[x265] [PATCH] Modify TEncSearch structure to support multiple color space formats
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Tue Jan 7 12:16:58 CET 2014
# HG changeset patch
# User ashok at multicorewareinc.com
# Date 1389093279 -19800
# Tue Jan 07 16:44:39 2014 +0530
# Node ID f7d21da102acf8d88be3f6ea6b6db5dc12134cdb
# Parent 4811da38078cd02434f7da1dcc1b0af4dcf5adb8
Modify TEncSearch structure to support multiple color space formats
diff -r 4811da38078c -r f7d21da102ac source/Lib/TLibCommon/CommonDef.h
--- a/source/Lib/TLibCommon/CommonDef.h Mon Jan 06 23:15:58 2014 -0600
+++ b/source/Lib/TLibCommon/CommonDef.h Tue Jan 07 16:44:39 2014 +0530
@@ -88,6 +88,9 @@
#define MLS_GRP_NUM 64 ///< G644 : Max number of coefficient groups, max(16, 64)
#define MLS_CG_SIZE 4 ///< G644 : Coefficient group size of 4x4
+#define MLS_CG_LOG2_WIDTH 2
+#define MLS_CG_LOG2_HEIGHT 2
+
#define ARL_C_PRECISION 7 ///< G382: 7-bit arithmetic precision
#define LEVEL_RANGE 30 ///< G382: max coefficient level in statistics collection
diff -r 4811da38078c -r f7d21da102ac source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jan 06 23:15:58 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jan 07 16:44:39 2014 +0530
@@ -229,7 +229,7 @@
if (bChroma)
{
- if (trSizeLog2 > 2)
+ if ((trSizeLog2 > 2) && !(cu->getChromaFormat() == CHROMA_444))
{
if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth - 1))
m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_CHROMA_U, trDepth);
@@ -275,7 +275,7 @@
return;
}
- if (ttype != TEXT_LUMA && trSizeLog2 == 2)
+ if ( (ttype != TEXT_LUMA) && (trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
{
assert(trDepth > 0);
trDepth--;
@@ -288,9 +288,11 @@
}
//===== coefficients =====
- uint32_t width = cu->getWidth(0) >> (trDepth + chroma);
- uint32_t height = cu->getHeight(0) >> (trDepth + chroma);
- uint32_t coeffOffset = (cu->getPic()->getMinCUWidth() * cu->getPic()->getMinCUHeight() * absPartIdx) >> (chroma << 1);
+ int cspx = chroma ? m_hChromaShift : 0;
+ int cspy = chroma ? m_vChromaShift : 0;
+ uint32_t width = cu->getWidth(0) >> (trDepth + cspx);
+ uint32_t height = cu->getHeight(0) >> (trDepth + cspy);
+ uint32_t coeffOffset = (cu->getPic()->getMinCUWidth() >> cspx) * (cu->getPic()->getMinCUHeight() >> cspy) * absPartIdx;
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
TCoeff* coeff = 0;
switch (ttype)
@@ -363,12 +365,23 @@
}
}
}
+
if (bChroma)
{
// chroma prediction mode
- if (absPartIdx == 0)
+ if ((cu->getPartitionSize(0) == SIZE_2Nx2N) || !(cu->getChromaFormat() == CHROMA_444))
{
- m_entropyCoder->encodeIntraDirModeChroma(cu, 0, true);
+ if (absPartIdx == 0)
+ {
+ m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx, true);
+ }
+ }
+ else
+ {
+ uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
+ assert(trDepth > 0);
+ if ((absPartIdx%qtNumParts) == 0)
+ m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx, true);
}
}
}
@@ -475,7 +488,7 @@
int lastPos = -1;
cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
m_trQuant->selectLambda(TEXT_LUMA);
absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, height, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
@@ -520,7 +533,7 @@
uint32_t fullDepth = cu->getDepth(0) + trDepth;
uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> fullDepth] + 2;
- if (trSizeLog2 == 2)
+ if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
{
assert(trDepth > 0);
trDepth--;
@@ -534,7 +547,7 @@
TextType ttype = (chromaId > 0 ? TEXT_CHROMA_V : TEXT_CHROMA_U);
uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdx);
- uint32_t width = cu->getWidth(0) >> (trDepth + m_hChromaShift);
+ uint32_t width = cu->getWidth(0) >> (trDepth + m_hChromaShift);
uint32_t height = cu->getHeight(0) >> (trDepth + m_vChromaShift);
uint32_t stride = fencYuv->getCStride();
Pel* fenc = (chromaId > 0 ? fencYuv->getCrAddr(absPartIdx) : fencYuv->getCbAddr(absPartIdx));
@@ -543,10 +556,10 @@
Pel* recon = (chromaId > 0 ? predYuv->getCrAddr(absPartIdx) : predYuv->getCbAddr(absPartIdx));
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
- uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> 2;
+ uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> (m_hChromaShift + m_vChromaShift);
TCoeff* coeff = (chromaId > 0 ? m_qtTempCoeffCr[qtlayer] : m_qtTempCoeffCb[qtlayer]) + numCoeffPerInc * absPartIdx;
int16_t* reconQt = (chromaId > 0 ? m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx) : m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx));
- assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
+ uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
Pel* reconIPred = (chromaId > 0 ? cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder));
@@ -557,7 +570,7 @@
//===== update chroma mode =====
if (chromaPredMode == DM_CHROMA_IDX)
{
- chromaPredMode = cu->getLumaIntraDir(0);
+ chromaPredMode = cu->getLumaIntraDir(absPartIdx);
}
//===== init availability pattern =====
@@ -565,11 +578,11 @@
{
cu->getPattern()->initPattern(cu, trDepth, absPartIdx);
- cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight);
+ cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, chromaId);
Pel* chromaPred = (chromaId > 0 ? cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) : cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));
//===== get prediction signal =====
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width);
+ predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, cu->getChromaFormat());
// save prediction
if (default0Save1Load2 == 1)
@@ -612,7 +625,7 @@
{
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
}
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
m_trQuant->selectLambda(TEXT_CHROMA);
@@ -639,7 +652,7 @@
//===== reconstruction =====
assert(((uint32_t)(size_t)residual & (width - 1)) == 0);
assert(width <= 32);
- primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, MAX_CU_SIZE / 2, reconIPredStride);
+ primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
//===== update distortion =====
uint32_t dist = primitives.sse_pp[part](fenc, stride, recon, stride);
@@ -702,11 +715,11 @@
uint32_t singleCbfY = 0;
uint32_t singleCbfU = 0;
uint32_t singleCbfV = 0;
- bool checkTransformSkip = cu->getSlice()->getPPS()->getUseTransformSkip();
+ bool checkTransformSkip = cu->getSlice()->getPPS()->getUseTransformSkip();
uint32_t widthTransformSkip = cu->getWidth(0) >> trDepth;
uint32_t heightTransformSkip = cu->getHeight(0) >> trDepth;
- int bestModeId = 0;
- int bestModeIdUV[2] = { 0, 0 };
+ int bestModeId = 0;
+ int bestModeIdUV[2] = { 0, 0 };
checkTransformSkip &= (widthTransformSkip == 4 && heightTransformSkip == 4);
checkTransformSkip &= (!cu->getCUTransquantBypass(0));
@@ -729,8 +742,8 @@
uint32_t singleCbfUTmp = 0;
uint32_t singleCbfVTmp = 0;
uint64_t singleCostTmp = 0;
- int default0Save1Load2 = 0;
- int firstCheckId = 0;
+ int default0Save1Load2 = 0;
+ int firstCheckId = 0;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + (trDepth - 1)) << 1);
bool bFirstQ = ((absPartIdx % qpdiv) == 0);
@@ -964,17 +977,17 @@
if (!bLumaOnly)
{
- width >>= 1;
- height >>= 1;
+ width >>= m_hChromaShift;
+ height >>= m_vChromaShift;
src = m_qtTempTComYuv[qtLayer].getCbAddr(absPartIdx);
- assert(m_qtTempTComYuv[qtLayer].m_cwidth == MAX_CU_SIZE / 2);
+ uint32_t srcstride = m_qtTempTComYuv[qtLayer].m_cwidth;
dst = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
dststride = cu->getPic()->getPicYuvRec()->getCStride();
- primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE / 2);
+ primitives.blockcpy_ps(width, height, dst, dststride, src, srcstride);
src = m_qtTempTComYuv[qtLayer].getCrAddr(absPartIdx);
dst = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
- primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE / 2);
+ primitives.blockcpy_ps(width, height, dst, dststride, src, srcstride);
}
}
@@ -1049,7 +1062,7 @@
int lastPos = -1;
cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
m_trQuant->selectLambda(TEXT_LUMA);
absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, height, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
@@ -1081,7 +1094,6 @@
if (bCheckSplit && !bCheckFull)
{
//----- code splitted block -----
-
uint32_t qPartsDiv = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
uint32_t absPartIdxSub = absPartIdx;
uint32_t splitCbfY = 0;
@@ -1267,12 +1279,12 @@
reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zOrder);
reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
reconQt = m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx);
- assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);
+ uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;
+ primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);
reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zOrder);
reconQt = m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx);
- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);
+ primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);
}
}
@@ -1376,20 +1388,20 @@
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
uint32_t width = cu->getWidth(0) >> (trDepth + 1);
uint32_t height = cu->getHeight(0) >> (trDepth + 1);
- assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
+ uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
if (stateU0V1Both2 == 0 || stateU0V1Both2 == 2)
{
Pel* reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
int16_t* reconQt = m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx);
- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);
+ primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);
}
if (stateU0V1Both2 == 1 || stateU0V1Both2 == 2)
{
Pel* reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
int16_t* reconQt = m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx);
- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);
+ primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);
}
}
}
@@ -1411,7 +1423,7 @@
uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> fullDepth] + 2;
uint32_t actualTrDepth = trDepth;
- if (trSizeLog2 == 2)
+ if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
{
assert(trDepth > 0);
actualTrDepth--;
@@ -1557,7 +1569,7 @@
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
bool bChromaSame = false;
- if (trSizeLog2 == 2)
+ if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
{
assert(trDepth > 0);
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth - 1) << 1);
@@ -1572,9 +1584,11 @@
uint32_t numCoeffC = (cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight()) >> (fullDepth << 1);
if (!bChromaSame)
{
- numCoeffC >>= 2;
+ numCoeffC = ((cu->getSlice()->getSPS()->getMaxCUWidth() >> m_hChromaShift) * (cu->getSlice()->getSPS()->getMaxCUHeight() >> m_vChromaShift)) >> (fullDepth << 1);
}
- uint32_t numCoeffIncC = (cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight()) >> ((cu->getSlice()->getSPS()->getMaxCUDepth() << 1) + 2);
+
+ uint32_t numCoeffIncC = ((cu->getSlice()->getSPS()->getMaxCUWidth() >> m_hChromaShift) * (cu->getSlice()->getSPS()->getMaxCUHeight() >> m_vChromaShift)) >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
+
TCoeff* coeffSrcU = m_qtTempCoeffCb[qtlayer] + (numCoeffIncC * absPartIdx);
TCoeff* coeffSrcV = m_qtTempCoeffCr[qtlayer] + (numCoeffIncC * absPartIdx);
TCoeff* coeffDstU = cu->getCoeffCb() + (numCoeffIncC * absPartIdx);
@@ -1583,7 +1597,7 @@
::memcpy(coeffDstV, coeffSrcV, sizeof(TCoeff) * numCoeffC);
//===== copy reconstruction =====
- uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 - 1);
+ uint32_t trSizeCLog2 = (bChromaSame || (cu->getChromaFormat() == CHROMA_444)) ? trSizeLog2 : trSizeLog2 - 1;
m_qtTempTComYuv[qtlayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2);
}
else
@@ -1650,11 +1664,11 @@
}
//===== init availability pattern =====
cu->getPattern()->initPattern(cu, trDepth, absPartIdx);
- cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight);
+ cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, chromaId);
Pel* chromaPred = (chromaId > 0 ? cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) : cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));
//===== get prediction signal =====
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width);
+ predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, cu->getChromaFormat());
//===== get residual signal =====
assert(!((uint32_t)(size_t)fenc & (width - 1)));
@@ -1676,7 +1690,7 @@
{
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
}
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
m_trQuant->selectLambda(TEXT_CHROMA);
@@ -1741,7 +1755,8 @@
//===== init pattern =====
assert(width == height);
cu->getPattern()->initPattern(cu, 0, 0);
- cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf, m_predBufStride, m_predBufHeight);
+ cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf, m_predBufStride, m_predBufHeight, 0/*chromaId*/);
+ cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf, m_predBufStride, m_predBufHeight, 1/*chromaId*/);
Pel* patChromaU = cu->getPattern()->getAdiCbBuf(width, height, m_predBuf);
Pel* patChromaV = cu->getPattern()->getAdiCrBuf(width, height, m_predBuf);
@@ -1754,8 +1769,8 @@
for (uint32_t mode = minMode; mode < maxMode; mode++)
{
//--- get prediction ---
- predIntraChromaAng(patChromaU, mode, predU, stride, width);
- predIntraChromaAng(patChromaV, mode, predV, stride, width);
+ predIntraChromaAng(patChromaU, mode, predU, stride, width, height, cu->getChromaFormat());
+ predIntraChromaAng(patChromaV, mode, predV, stride, width, height, cu->getChromaFormat());
//--- get SAD ---
uint32_t sad = sa8d(fencU, stride, predU, stride) + sa8d(fencV, stride, predV, stride);
@@ -2131,13 +2146,14 @@
if (width > 32)
{
- scaleWidth = 32;
- scaleStride = 32;
+ scaleWidth = 32;
+ scaleStride = 32;
costMultiplier = 4;
}
cu->getPattern()->initPattern(cu, trDepth, absPartIdx);
- cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight);
+ cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, 0);
+ cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, 1);
cu->getAllowedChromaDir(0, modeList);
//----- check chroma modes -----
@@ -2156,7 +2172,7 @@
Pel* chromaPred = (chromaId > 0 ? cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) : cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));
//===== get prediction signal =====
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width);
+ predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, cu->getChromaFormat());
int log2SizeMinus2 = g_convertToBit[scaleWidth];
pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
sad = costMultiplier * sa8d(fenc, scaleStride, pred, scaleStride);
@@ -2174,6 +2190,27 @@
cu->setChromIntraDirSubParts(bestMode, 0, depth);
}
+bool TEncSearch::isNextSection()
+{
+ if (m_splitMode == DONT_SPLIT)
+ {
+ m_section++;
+ return false;
+ }
+ else
+ {
+ m_absPartIdxTURelCU += m_absPartIdxStep;
+
+ m_section++;
+ return m_section< (1 << m_splitMode);
+ }
+}
+
+bool TEncSearch::isLastSection()
+{
+ return (m_section+1) >= (1<<m_splitMode);
+}
+
void TEncSearch::estIntraPredChromaQT(TComDataCU* cu,
TComYuv* fencYuv,
TComYuv* predYuv,
@@ -2181,60 +2218,109 @@
TComYuv* reconYuv,
uint32_t preCalcDistC)
{
- uint32_t depth = cu->getDepth(0);
- uint32_t bestMode = 0;
- uint32_t bestDist = 0;
- uint64_t bestCost = MAX_INT64;
-
- //----- init mode list -----
- uint32_t minMode = 0;
- uint32_t maxMode = NUM_CHROMA_MODE;
- uint32_t modeList[NUM_CHROMA_MODE];
-
- cu->getAllowedChromaDir(0, modeList);
-
- //----- check chroma modes -----
- for (uint32_t mode = minMode; mode < maxMode; mode++)
+ uint32_t depth = cu->getDepth(0);
+ uint32_t initTrDepth = (cu->getPartitionSize(0) != SIZE_2Nx2N) && (cu->getChromaFormat() == CHROMA_444 ? 1 : 0);
+ m_splitMode = (initTrDepth == 0) ? DONT_SPLIT : QUAD_SPLIT;
+ m_absPartIdxStep = (cu->getPic()->getNumPartInCU() >> (depth << 1)) >> partIdxStepShift[m_splitMode];
+ m_partOffset = 0;
+ m_section = 0;
+ m_absPartIdxTURelCU = 0;
+
+ do
{
- //----- restore context models -----
- m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
-
- //----- chroma coding -----
- uint32_t dist = 0;
- cu->setChromIntraDirSubParts(modeList[mode], 0, depth);
- xRecurIntraChromaCodingQT(cu, 0, 0, fencYuv, predYuv, resiYuv, dist);
- if (cu->getSlice()->getPPS()->getUseTransformSkip())
+ uint32_t bestMode = 0;
+ uint32_t bestDist = 0;
+ uint64_t bestCost = MAX_INT64;
+
+ //----- init mode list -----
+ uint32_t minMode = 0;
+ uint32_t maxMode = NUM_CHROMA_MODE;
+ uint32_t modeList[NUM_CHROMA_MODE];
+
+ m_partOffset = m_absPartIdxTURelCU;
+
+ cu->getAllowedChromaDir(m_partOffset, modeList);
+
+ //----- check chroma modes -----
+ for (uint32_t mode = minMode; mode < maxMode; mode++)
{
+ //----- restore context models -----
m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
+
+ //----- chroma coding -----
+ uint32_t dist = 0;
+
+ cu->setChromIntraDirSubParts(modeList[mode], m_partOffset, depth + initTrDepth);
+
+ xRecurIntraChromaCodingQT(cu, initTrDepth, m_absPartIdxTURelCU, fencYuv, predYuv, resiYuv, dist);
+
+ if (cu->getSlice()->getPPS()->getUseTransformSkip())
+ {
+ m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
+ }
+
+ uint32_t bits = xGetIntraBitsQT(cu, initTrDepth, m_absPartIdxTURelCU, false, true);
+ uint64_t cost = m_rdCost->calcRdCost(dist, bits);
+
+ //----- compare -----
+ if (cost < bestCost)
+ {
+ bestCost = cost;
+ bestDist = dist;
+ bestMode = modeList[mode];
+ xSetIntraResultChromaQT(cu, initTrDepth, m_absPartIdxTURelCU, reconYuv);
+ ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U) + m_partOffset, m_absPartIdxStep * sizeof(UChar));
+ ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V) + m_partOffset, m_absPartIdxStep * sizeof(UChar));
+ ::memcpy(m_qtTempTransformSkipFlag[1], cu->getTransformSkip(TEXT_CHROMA_U) + m_partOffset, m_absPartIdxStep * sizeof(UChar));
+ ::memcpy(m_qtTempTransformSkipFlag[2], cu->getTransformSkip(TEXT_CHROMA_V) + m_partOffset, m_absPartIdxStep * sizeof(UChar));
+ }
}
- uint32_t bits = xGetIntraBitsQT(cu, 0, 0, false, true);
- uint64_t cost = m_rdCost->calcRdCost(dist, bits);
-
- //----- compare -----
- if (cost < bestCost)
+ if (!isLastSection())
{
- bestCost = cost;
- bestDist = dist;
- bestMode = modeList[mode];
- uint32_t qpn = cu->getPic()->getNumPartInCU() >> (depth << 1);
- xSetIntraResultChromaQT(cu, 0, 0, reconYuv);
- ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U), qpn * sizeof(UChar));
- ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V), qpn * sizeof(UChar));
- ::memcpy(m_qtTempTransformSkipFlag[1], cu->getTransformSkip(TEXT_CHROMA_U), qpn * sizeof(UChar));
- ::memcpy(m_qtTempTransformSkipFlag[2], cu->getTransformSkip(TEXT_CHROMA_V), qpn * sizeof(UChar));
+ uint32_t compWidth = (cu->getWidth(0) >> m_hChromaShift) >> initTrDepth;
+ uint32_t compHeight = (cu->getHeight(0) >> m_vChromaShift) >> initTrDepth;
+ uint32_t zorder = cu->getZorderIdxInCU() + m_partOffset;
+ Pel* dst = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
+ uint32_t dststride = cu->getPic()->getPicYuvRec()->getCStride();
+ Pel* src = reconYuv->getCbAddr(m_partOffset);
+ uint32_t srcstride = reconYuv->getCStride();
+
+ primitives.blockcpy_pp(compWidth, compHeight, dst, dststride, src, srcstride);
+
+ dst = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
+ src = reconYuv->getCrAddr(m_partOffset);
+ primitives.blockcpy_pp(compWidth, compHeight, dst, dststride, src, srcstride);
+ }
+
+ //----- set data -----
+ ::memcpy(cu->getCbf(TEXT_CHROMA_U) + m_partOffset, m_qtTempCbf[1], m_absPartIdxStep * sizeof(UChar));
+ ::memcpy(cu->getCbf(TEXT_CHROMA_V) + m_partOffset, m_qtTempCbf[2], m_absPartIdxStep * sizeof(UChar));
+ ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U) + m_partOffset, m_qtTempTransformSkipFlag[1], m_absPartIdxStep * sizeof(UChar));
+ ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V) + m_partOffset, m_qtTempTransformSkipFlag[2], m_absPartIdxStep * sizeof(UChar));
+ cu->setChromIntraDirSubParts(bestMode, m_partOffset, depth + initTrDepth);
+ cu->m_totalDistortion += bestDist - preCalcDistC;
+
+ } while(isNextSection());
+
+ //----- restore context models -----
+ if (initTrDepth != 0)
+ { // set Cbf for all blocks
+ uint32_t uiCombCbfU = 0;
+ uint32_t uiCombCbfV = 0;
+ uint32_t uiPartIdx = 0;
+ for (uint32_t uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += m_absPartIdxStep)
+ {
+ uiCombCbfU |= cu->getCbf(uiPartIdx, TEXT_CHROMA_U, 1);
+ uiCombCbfV |= cu->getCbf(uiPartIdx, TEXT_CHROMA_V, 1);
+ }
+ for (uint32_t uiOffs = 0; uiOffs < 4 * m_absPartIdxStep; uiOffs++)
+ {
+ cu->getCbf( TEXT_CHROMA_U )[ uiOffs ] |= uiCombCbfU;
+ cu->getCbf( TEXT_CHROMA_V )[ uiOffs ] |= uiCombCbfV;
}
}
- //----- set data -----
- uint32_t qpn = cu->getPic()->getNumPartInCU() >> (depth << 1);
- ::memcpy(cu->getCbf(TEXT_CHROMA_U), m_qtTempCbf[1], qpn * sizeof(UChar));
- ::memcpy(cu->getCbf(TEXT_CHROMA_V), m_qtTempCbf[2], qpn * sizeof(UChar));
- ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U), m_qtTempTransformSkipFlag[1], qpn * sizeof(UChar));
- ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V), m_qtTempTransformSkipFlag[2], qpn * sizeof(UChar));
- cu->setChromIntraDirSubParts(bestMode, 0, depth);
- cu->m_totalDistortion += bestDist - preCalcDistC;
-
//----- restore context models -----
m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
}
@@ -3085,10 +3171,11 @@
outResiYuv->clear();
predYuv->copyToPartYuv(outReconYuv, 0);
-
+ //Luma
int part = partitionFromSizes(width, height);
distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
- part = partitionFromSizes(width >> 1, height >> 1);
+ //Chroma
+ part = partitionFromSizes(width >> m_hChromaShift, height >> m_vChromaShift);
distortion += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
distortion += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
@@ -3208,7 +3295,7 @@
// update with clipped distortion and cost (qp estimation loop uses unclipped values)
int part = partitionFromSizes(width, height);
bdist = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
- part = partitionFromSizes(width >> 1, height >> 1);
+ part = partitionFromSizes(width >> cu->getHorzChromaShift(), height >> cu->getVertChromaShift());
bdist += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
bdist += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
bcost = m_rdCost->calcRdCost(bdist, bestBits);
@@ -3311,7 +3398,7 @@
cu->setTransformSkipSubParts(0, TEXT_CHROMA_V, absPartIdx, cu->getDepth(0) + trModeC);
}
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
m_trQuant->selectLambda(TEXT_LUMA);
absSumY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
@@ -3322,7 +3409,7 @@
if (bCodeChroma)
{
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
m_trQuant->selectLambda(TEXT_CHROMA);
@@ -3330,7 +3417,7 @@
trWidthC, trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosU, false, curuseRDOQ);
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
absSumV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
trWidthC, trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosV, false, curuseRDOQ);
@@ -3342,7 +3429,7 @@
{
int16_t *curResiY = resiYuv->getLumaAddr(absTUPartIdx);
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];
assert(scalingListType < 6);
@@ -3362,7 +3449,7 @@
int16_t *pcResiCurrU = resiYuv->getCbAddr(absTUPartIdxC);
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];
assert(scalingListType < 6);
@@ -3378,7 +3465,7 @@
{
int16_t *curResiV = resiYuv->getCrAddr(absTUPartIdxC);
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];
assert(scalingListType < 6);
@@ -3453,6 +3540,7 @@
assert(cu->getDepth(0) == cu->getDepth(absPartIdx));
const uint32_t trMode = depth - cu->getDepth(0);
const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
+ uint32_t trSizeCLog2 = g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >> m_hChromaShift) >> depth] + 2;;
bool bSplitFlag = ((cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && cu->getPredictionMode(absPartIdx) == MODE_INTER && (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N));
bool bCheckFull;
@@ -3465,12 +3553,11 @@
bool bCodeChroma = true;
uint32_t trModeC = trMode;
- uint32_t trSizeCLog2 = trSizeLog2 - 1;
- if (trSizeLog2 == 2)
+ if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
{
trSizeCLog2++;
trModeC--;
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
+ uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
bCodeChroma = ((absPartIdx % qpdiv) == 0);
}
@@ -3490,8 +3577,8 @@
const uint32_t numCoeffPerAbsPartIdxIncrement = cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
TCoeff *coeffCurY = m_qtTempCoeffY[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx);
- TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
- TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
+ TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
+ TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
int trWidth = 0, trHeight = 0, trWidthC = 0, trHeightC = 0;
uint32_t absTUPartIdxC = absPartIdx;
@@ -3520,7 +3607,7 @@
m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trWidth, trHeight, TEXT_LUMA);
}
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
m_trQuant->selectLambda(TEXT_LUMA);
absSumY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
@@ -3534,17 +3621,17 @@
{
m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trWidthC, trHeightC, TEXT_CHROMA);
}
-
+ //Cb transform
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
m_trQuant->selectLambda(TEXT_CHROMA);
absSumU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,
trWidthC, trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosU, false, curuseRDOQ);
-
+ //Cr transform
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
absSumV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
trWidthC, trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosV, false, curuseRDOQ);
@@ -3586,7 +3673,7 @@
{
int16_t *curResiY = m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx);
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];
assert(scalingListType < 6);
@@ -3658,16 +3745,15 @@
int16_t *pcResiCurrU = m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];
assert(scalingListType < 6);
- assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, pcResiCurrU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC, scalingListType, false, lastPosU);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, pcResiCurrU, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurU, trWidthC, trHeightC, scalingListType, false, lastPosU);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth,
m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC),
- MAX_CU_SIZE / 2);
+ m_qtTempTComYuv[qtlayer].m_cwidth);
const uint32_t nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
if (cu->isLosslessCoded(0))
@@ -3710,10 +3796,10 @@
if (!absSumU)
{
int16_t *ptr = m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
- assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
+ const uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;
assert(trWidthC == trHeightC);
- primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, MAX_CU_SIZE / 2, 0);
+ primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, stride, 0);
}
distV = m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, m_tempPel, trWidthC));
@@ -3725,16 +3811,15 @@
{
int16_t *curResiV = m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];
assert(scalingListType < 6);
- assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, MAX_CU_SIZE / 2, coeffCurV, trWidthC, trHeightC, scalingListType, false, lastPosV);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurV, trWidthC, trHeightC, scalingListType, false, lastPosV);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth,
m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC),
- MAX_CU_SIZE / 2);
+ m_qtTempTComYuv[qtlayer].m_cwidth);
const uint32_t nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
if (cu->isLosslessCoded(0))
@@ -3777,10 +3862,10 @@
if (!absSumV)
{
int16_t *ptr = m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);
- assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
+ const uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;
assert(trWidthC == trHeightC);
- primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, MAX_CU_SIZE / 2, 0);
+ primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, stride, 0);
}
}
cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
@@ -3817,7 +3902,7 @@
m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trWidth, trHeight, TEXT_LUMA);
}
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
m_trQuant->selectLambda(TEXT_LUMA);
absSumTransformSkipY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
@@ -3831,7 +3916,7 @@
m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trWidth, trHeight, depth, TEXT_LUMA);
const uint32_t skipSingleBitsY = m_entropyCoder->getNumberOfWrittenBits();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];
assert(scalingListType < 6);
@@ -3874,7 +3959,7 @@
int16_t *curResiU = m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
int16_t *curResiV = m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);
- assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
+ uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;
TCoeff bestCoeffU[32 * 32], bestCoeffV[32 * 32];
memcpy(bestCoeffU, coeffCurU, sizeof(TCoeff) * numSamplesChroma);
@@ -3883,8 +3968,8 @@
int16_t bestResiU[32 * 32], bestResiV[32 * 32];
for (int i = 0; i < trHeightC; ++i)
{
- memcpy(&bestResiU[i * trWidthC], curResiU + i * (MAX_CU_SIZE / 2), sizeof(int16_t) * trWidthC);
- memcpy(&bestResiV[i * trWidthC], curResiV + i * (MAX_CU_SIZE / 2), sizeof(int16_t) * trWidthC);
+ memcpy(&bestResiU[i * trWidthC], curResiU + i * stride, sizeof(int16_t) * trWidthC);
+ memcpy(&bestResiV[i * trWidthC], curResiV + i * stride, sizeof(int16_t) * trWidthC);
}
m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);
@@ -3898,13 +3983,13 @@
}
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
m_trQuant->selectLambda(TEXT_CHROMA);
absSumTransformSkipU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,
trWidthC, trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosTransformSkipU, true, curuseRDOQ);
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
absSumTransformSkipV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
trWidthC, trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosTransformSkipV, true, curuseRDOQ);
@@ -3922,17 +4007,15 @@
singleBitsU = m_entropyCoder->getNumberOfWrittenBits();
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];
assert(scalingListType < 6);
- assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
-
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipU);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiU, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurU, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipU);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth,
m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC),
- MAX_CU_SIZE / 2);
+ m_qtTempTComYuv[qtlayer].m_cwidth);
nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsU);
}
@@ -3944,7 +4027,7 @@
memcpy(coeffCurU, bestCoeffU, sizeof(TCoeff) * numSamplesChroma);
for (int i = 0; i < trHeightC; ++i)
{
- memcpy(curResiU + i * (MAX_CU_SIZE / 2), &bestResiU[i * trWidthC], sizeof(int16_t) * trWidthC);
+ memcpy(curResiU + i * stride, &bestResiU[i * trWidthC], sizeof(int16_t) * trWidthC);
}
}
else
@@ -3961,17 +4044,15 @@
singleBitsV = m_entropyCoder->getNumberOfWrittenBits() - singleBitsU;
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());
int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];
assert(scalingListType < 6);
- assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
-
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, MAX_CU_SIZE / 2, coeffCurV, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipV);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurV, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipV);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth,
m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC),
- MAX_CU_SIZE / 2);
+ m_qtTempTComYuv[qtlayer].m_cwidth);
nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsV);
}
@@ -3983,7 +4064,7 @@
memcpy(coeffCurV, bestCoeffV, sizeof(TCoeff) * numSamplesChroma);
for (int i = 0; i < trHeightC; ++i)
{
- memcpy(curResiV + i * (MAX_CU_SIZE / 2), &bestResiV[i * trWidthC], sizeof(int16_t) * trWidthC);
+ memcpy(curResiV + i * stride, &bestResiV[i * trWidthC], sizeof(int16_t) * trWidthC);
}
}
else
@@ -4115,6 +4196,7 @@
const uint32_t trMode = cu->getTransformIdx(absPartIdx);
const bool bSubdiv = curTrMode != trMode;
const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
+ uint32_t trSizeCLog2 = g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >> m_hChromaShift) >> depth] + 2;
if (bSubdivAndCbf && trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
{
@@ -4145,21 +4227,20 @@
if (!bSubdiv)
{
+ //Luma
const uint32_t numCoeffPerAbsPartIdxIncrement = cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
- //assert( 16 == uiNumCoeffPerAbsPartIdxIncrement ); // check
const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
TCoeff *coeffCurY = m_qtTempCoeffY[qtlayer] + numCoeffPerAbsPartIdxIncrement * absPartIdx;
- TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
- TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
+
+ //Chroma
+ TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
+ TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
bool bCodeChroma = true;
- uint32_t trModeC = trMode;
- uint32_t trSizeCLog2 = trSizeLog2 - 1;
- if (trSizeLog2 == 2)
+ if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
{
trSizeCLog2++;
- trModeC--;
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
+ uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
bCodeChroma = ((absPartIdx % qpdiv) == 0);
}
@@ -4171,21 +4252,18 @@
{
if (ttype == TEXT_LUMA && cu->getCbf(absPartIdx, TEXT_LUMA, trMode))
{
- int trWidth = 1 << trSizeLog2;
- int trHeight = 1 << trSizeLog2;
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trWidth, trHeight, depth, TEXT_LUMA);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, 1 << trSizeLog2, 1 << trSizeLog2, depth, TEXT_LUMA);
}
+
if (bCodeChroma)
{
- int trWidth = 1 << trSizeCLog2;
- int trHeight = 1 << trSizeCLog2;
if (ttype == TEXT_CHROMA_U && cu->getCbf(absPartIdx, TEXT_CHROMA_U, trMode))
{
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trWidth, trHeight, depth, TEXT_CHROMA_U);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2, depth, TEXT_CHROMA_U);
}
if (ttype == TEXT_CHROMA_V && cu->getCbf(absPartIdx, TEXT_CHROMA_V, trMode))
{
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trWidth, trHeight, depth, TEXT_CHROMA_V);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2, depth, TEXT_CHROMA_V);
}
}
}
@@ -4211,13 +4289,13 @@
if (curTrMode == trMode)
{
- const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
+ const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
+ uint32_t trSizeCLog2 = g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >> cu->getHorzChromaShift()) >> depth] + 2;;
const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
bool bCodeChroma = true;
uint32_t trModeC = trMode;
- uint32_t trSizeCLog2 = trSizeLog2 - 1;
- if (trSizeLog2 == 2)
+ if((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
{
trSizeCLog2++;
trModeC--;
@@ -4246,10 +4324,10 @@
if (bCodeChroma)
{
uint32_t uiNumCoeffC = (1 << (trSizeCLog2 << 1));
- TCoeff* pcCoeffSrcU = m_qtTempCoeffCb[qtlayer] + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
- TCoeff* pcCoeffSrcV = m_qtTempCoeffCr[qtlayer] + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
- TCoeff* pcCoeffDstU = cu->getCoeffCb() + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
- TCoeff* pcCoeffDstV = cu->getCoeffCr() + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
+ TCoeff* pcCoeffSrcU = m_qtTempCoeffCb[qtlayer] + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
+ TCoeff* pcCoeffSrcV = m_qtTempCoeffCr[qtlayer] + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
+ TCoeff* pcCoeffDstU = cu->getCoeffCb() + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
+ TCoeff* pcCoeffDstV = cu->getCoeffCr() + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
::memcpy(pcCoeffDstU, pcCoeffSrcU, sizeof(TCoeff) * uiNumCoeffC);
::memcpy(pcCoeffDstV, pcCoeffSrcV, sizeof(TCoeff) * uiNumCoeffC);
}
diff -r 4811da38078c -r f7d21da102ac source/common/TShortYUV.h
--- a/source/common/TShortYUV.h Mon Jan 06 23:15:58 2014 -0600
+++ b/source/common/TShortYUV.h Tue Jan 07 16:44:39 2014 +0530
@@ -87,9 +87,9 @@
// Access starting position of YUV partition unit buffer
int16_t* getLumaAddr(unsigned int partUnitIdx) { return m_bufY + getAddrOffset(partUnitIdx, m_width); }
- int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb + (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }
+ int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb + (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }
- int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr + (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }
+ int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr + (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }
// Access starting position of YUV transform unit buffer
int16_t* getLumaAddr(unsigned int partIdx, unsigned int size) { return m_bufY + getAddrOffset(partIdx, size, m_width); }
diff -r 4811da38078c -r f7d21da102ac source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp Mon Jan 06 23:15:58 2014 -0600
+++ b/source/common/ipfilter.cpp Tue Jan 07 16:44:39 2014 +0530
@@ -449,74 +449,108 @@
namespace x265 {
// x265 private namespace
-#define CHROMA(W, H) \
+#define CHROMA_420(W, H) \
p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
- p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
- p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
- p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
+ p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
+ p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
+ p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
+#define CHROMA_444(W, H) \
+ p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
+
#define LUMA(W, H) \
p.luma_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<8, W, H>; \
p.luma_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<8, W, H>; \
- p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \
- p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \
- p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \
- p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \
+ p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \
+ p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \
+ p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \
+ p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \
p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_hv_pp_c<8, W, H>;
void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
{
LUMA(4, 4);
LUMA(8, 8);
- CHROMA(4, 4);
+ CHROMA_420(4, 4);
LUMA(4, 8);
- CHROMA(2, 4);
+ CHROMA_420(2, 4);
LUMA(8, 4);
- CHROMA(4, 2);
+ CHROMA_420(4, 2);
LUMA(16, 16);
- CHROMA(8, 8);
+ CHROMA_420(8, 8);
LUMA(16, 8);
- CHROMA(8, 4);
+ CHROMA_420(8, 4);
LUMA(8, 16);
- CHROMA(4, 8);
+ CHROMA_420(4, 8);
LUMA(16, 12);
- CHROMA(8, 6);
+ CHROMA_420(8, 6);
LUMA(12, 16);
- CHROMA(6, 8);
+ CHROMA_420(6, 8);
LUMA(16, 4);
- CHROMA(8, 2);
+ CHROMA_420(8, 2);
LUMA(4, 16);
- CHROMA(2, 8);
+ CHROMA_420(2, 8);
LUMA(32, 32);
- CHROMA(16, 16);
+ CHROMA_420(16, 16);
LUMA(32, 16);
- CHROMA(16, 8);
+ CHROMA_420(16, 8);
LUMA(16, 32);
- CHROMA(8, 16);
+ CHROMA_420(8, 16);
LUMA(32, 24);
- CHROMA(16, 12);
+ CHROMA_420(16, 12);
LUMA(24, 32);
- CHROMA(12, 16);
+ CHROMA_420(12, 16);
LUMA(32, 8);
- CHROMA(16, 4);
+ CHROMA_420(16, 4);
LUMA(8, 32);
- CHROMA(4, 16);
+ CHROMA_420(4, 16);
LUMA(64, 64);
- CHROMA(32, 32);
+ CHROMA_420(32, 32);
LUMA(64, 32);
- CHROMA(32, 16);
+ CHROMA_420(32, 16);
LUMA(32, 64);
- CHROMA(16, 32);
+ CHROMA_420(16, 32);
LUMA(64, 48);
- CHROMA(32, 24);
+ CHROMA_420(32, 24);
LUMA(48, 64);
- CHROMA(24, 32);
+ CHROMA_420(24, 32);
LUMA(64, 16);
- CHROMA(32, 8);
+ CHROMA_420(32, 8);
LUMA(16, 64);
- CHROMA(8, 32);
+ CHROMA_420(8, 32);
+
+ CHROMA_444(4, 4);
+ CHROMA_444(8, 8);
+ CHROMA_444(4, 8);
+ CHROMA_444(8, 4);
+ CHROMA_444(16, 16);
+ CHROMA_444(16, 8);
+ CHROMA_444(8, 16);
+ CHROMA_444(16, 12);
+ CHROMA_444(12, 16);
+ CHROMA_444(16, 4);
+ CHROMA_444(4, 16);
+ CHROMA_444(32, 32);
+ CHROMA_444(32, 16);
+ CHROMA_444(16, 32);
+ CHROMA_444(32, 24);
+ CHROMA_444(24, 32);
+ CHROMA_444(32, 8);
+ CHROMA_444(8, 32);
+ CHROMA_444(64, 64);
+ CHROMA_444(64, 32);
+ CHROMA_444(32, 64);
+ CHROMA_444(64, 48);
+ CHROMA_444(48, 64);
+ CHROMA_444(64, 16);
+ CHROMA_444(16, 64);
p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_ps_c<8>;
p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_ps_c<4>;
@@ -525,7 +559,9 @@
p.chroma_vsp = filterVertical_sp_c<4>;
p.luma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
- p.chroma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
+
+ p.chroma_p2s[X265_CSP_I444] = filterConvertPelToShort_c<MAX_CU_SIZE>;
+ p.chroma_p2s[X265_CSP_I420] = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
p.extendRowBorder = extendCURowColBorder;
}
diff -r 4811da38078c -r f7d21da102ac source/common/pixel.cpp
--- a/source/common/pixel.cpp Mon Jan 06 23:15:58 2014 -0600
+++ b/source/common/pixel.cpp Tue Jan 07 16:44:39 2014 +0530
@@ -805,6 +805,27 @@
namespace x265 {
// x265 private namespace
+#define CHROMA_420(W, H) \
+ p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
+ p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
+ p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
+ p.chroma[X265_CSP_I420].sub_ps [CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
+ p.chroma[X265_CSP_I420].add_ps [CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+
+#define CHROMA_444(W, H) \
+ p.chroma[X265_CSP_I444].copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
+ p.chroma[X265_CSP_I444].copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
+ p.chroma[X265_CSP_I444].copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
+ p.chroma[X265_CSP_I444].sub_ps [LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
+ p.chroma[X265_CSP_I444].add_ps [LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+
+#define LUMA(W, H) \
+ p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
+ p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
+ p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
+ p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
+ p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+
/* It should initialize entries for pixel functions defined in this file. */
void Setup_C_PixelPrimitives(EncoderPrimitives &p)
{
@@ -840,69 +861,81 @@
p.satd[LUMA_64x16] = satd8<64, 16>;
p.satd[LUMA_16x64] = satd8<16, 64>;
-#define CHROMA(W, H) \
- p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
- p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
- p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
- p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
-
-#define LUMA(W, H) \
- p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
- p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
- p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
- p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
-
LUMA(4, 4);
LUMA(8, 8);
- CHROMA(4, 4);
+ CHROMA_420(4, 4);
LUMA(4, 8);
- CHROMA(2, 4);
+ CHROMA_420(2, 4);
LUMA(8, 4);
- CHROMA(4, 2);
+ CHROMA_420(4, 2);
LUMA(16, 16);
- CHROMA(8, 8);
+ CHROMA_420(8, 8);
LUMA(16, 8);
- CHROMA(8, 4);
+ CHROMA_420(8, 4);
LUMA(8, 16);
- CHROMA(4, 8);
+ CHROMA_420(4, 8);
LUMA(16, 12);
- CHROMA(8, 6);
+ CHROMA_420(8, 6);
LUMA(12, 16);
- CHROMA(6, 8);
+ CHROMA_420(6, 8);
LUMA(16, 4);
- CHROMA(8, 2);
+ CHROMA_420(8, 2);
LUMA(4, 16);
- CHROMA(2, 8);
+ CHROMA_420(2, 8);
LUMA(32, 32);
- CHROMA(16, 16);
+ CHROMA_420(16, 16);
LUMA(32, 16);
- CHROMA(16, 8);
+ CHROMA_420(16, 8);
LUMA(16, 32);
- CHROMA(8, 16);
+ CHROMA_420(8, 16);
LUMA(32, 24);
- CHROMA(16, 12);
+ CHROMA_420(16, 12);
LUMA(24, 32);
- CHROMA(12, 16);
+ CHROMA_420(12, 16);
LUMA(32, 8);
- CHROMA(16, 4);
+ CHROMA_420(16, 4);
LUMA(8, 32);
- CHROMA(4, 16);
+ CHROMA_420(4, 16);
LUMA(64, 64);
- CHROMA(32, 32);
+ CHROMA_420(32, 32);
LUMA(64, 32);
- CHROMA(32, 16);
+ CHROMA_420(32, 16);
LUMA(32, 64);
- CHROMA(16, 32);
+ CHROMA_420(16, 32);
LUMA(64, 48);
- CHROMA(32, 24);
+ CHROMA_420(32, 24);
LUMA(48, 64);
- CHROMA(24, 32);
+ CHROMA_420(24, 32);
LUMA(64, 16);
- CHROMA(32, 8);
+ CHROMA_420(32, 8);
LUMA(16, 64);
- CHROMA(8, 32);
+ CHROMA_420(8, 32);
+
+ CHROMA_444(4, 4);
+ CHROMA_444(8, 8);
+ CHROMA_444(4, 8);
+ CHROMA_444(8, 4);
+ CHROMA_444(16, 16);
+ CHROMA_444(16, 8);
+ CHROMA_444(8, 16);
+ CHROMA_444(16, 12);
+ CHROMA_444(12, 16);
+ CHROMA_444(16, 4);
+ CHROMA_444(4, 16);
+ CHROMA_444(32, 32);
+ CHROMA_444(32, 16);
+ CHROMA_444(16, 32);
+ CHROMA_444(32, 24);
+ CHROMA_444(24, 32);
+ CHROMA_444(32, 8);
+ CHROMA_444(8, 32);
+ CHROMA_444(64, 64);
+ CHROMA_444(64, 32);
+ CHROMA_444(32, 64);
+ CHROMA_444(64, 48);
+ CHROMA_444(48, 64);
+ CHROMA_444(64, 16);
+ CHROMA_444(16, 64);
SET_FUNC_PRIMITIVE_TABLE_C(sse_pp, sse, pixelcmp_t, pixel, pixel)
SET_FUNC_PRIMITIVE_TABLE_C(sse_sp, sse, pixelcmp_sp_t, int16_t, pixel)
diff -r 4811da38078c -r f7d21da102ac source/common/primitives.h
--- a/source/common/primitives.h Mon Jan 06 23:15:58 2014 -0600
+++ b/source/common/primitives.h Tue Jan 07 16:44:39 2014 +0530
@@ -75,7 +75,7 @@
// 4:2:0 chroma partition sizes. These enums are just a convenience for indexing into the
// chroma primitive arrays when instantiating templates. The function tables should always
// be indexed by the luma partition enum
-enum Chroma420Partions
+enum Chroma420Partitions
{
CHROMA_2x2, // never used by HEVC
CHROMA_4x4, CHROMA_4x2, CHROMA_2x4,
@@ -240,7 +240,7 @@
ipfilter_ps_t ipfilter_ps[NUM_IPFILTER_P_S];
ipfilter_ss_t ipfilter_ss[NUM_IPFILTER_S_S];
filter_p2s_t luma_p2s;
- filter_p2s_t chroma_p2s;
+ filter_p2s_t chroma_p2s[NUM_CHROMA_PARTITIONS];
ipfilter_sp_t chroma_vsp;
weightp_sp_t weight_sp;
diff -r 4811da38078c -r f7d21da102ac source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Mon Jan 06 23:15:58 2014 -0600
+++ b/source/encoder/encoder.cpp Tue Jan 07 16:44:39 2014 +0530
@@ -1288,6 +1288,8 @@
bEnableRDOQTS = 0;
}
+ m_csp = _param->internalCsp;
+
//====== Coding Tools ========
uint32_t tuQTMaxLog2Size = g_convertToBit[_param->maxCUSize] + 2 - 1;
diff -r 4811da38078c -r f7d21da102ac source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon Jan 06 23:15:58 2014 -0600
+++ b/source/encoder/frameencoder.cpp Tue Jan 07 16:44:39 2014 +0530
@@ -330,11 +330,11 @@
// instead we weight the distortion of chroma.
int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();
int qpc = Clip3(0, 70, qp + chromaQPOffset);
- double cbWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into account of the chroma qp mapping and chroma qp Offset
+ double cbWeight = pow(2.0, (qp - g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset
chromaQPOffset = slice->getPPS()->getChromaCrQpOffset() + slice->getSliceQpDeltaCr();
qpc = Clip3(0, 70, qp + chromaQPOffset);
- double crWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into account of the chroma qp mapping and chroma qp Offset
+ double crWeight = pow(2.0, (qp - g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset
double chromaLambda = lambda / crWeight;
m_rows[row].m_search.setQPLambda(qp, lambda, chromaLambda);
@@ -369,10 +369,10 @@
int qpc;
int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();
qpc = Clip3(0, 70, qp + chromaQPOffset);
- double cbWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into account of the chroma qp mapping and chroma qp Offset
+ double cbWeight = pow(2.0, (qp - g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset
chromaQPOffset = slice->getPPS()->getChromaCrQpOffset() + slice->getSliceQpDeltaCr();
qpc = Clip3(0, 70, qp + chromaQPOffset);
- double crWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into account of the chroma qp mapping and chroma qp Offset
+ double crWeight = pow(2.0, (qp - g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset
double chromaLambda = lambda / crWeight;
// NOTE: set SAO lambda every Frame
diff -r 4811da38078c -r f7d21da102ac source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Mon Jan 06 23:15:58 2014 -0600
+++ b/source/encoder/framefilter.cpp Tue Jan 07 16:44:39 2014 +0530
@@ -64,6 +64,9 @@
m_cfg = top;
m_numRows = numRows;
+ m_hChromaShift = CHROMA_H_SHIFT(m_cfg->getColorFormat());
+ m_vChromaShift = CHROMA_V_SHIFT(m_cfg->getColorFormat());
+
// NOTE: for sao only, I write this code because I want to exact match with HM's bug bitstream
m_rdGoOnSbacCoderRow0 = rdGoOnSbacCoder;
@@ -77,7 +80,7 @@
m_sao.setSaoLcuBoundary(top->param.saoLcuBoundary);
m_sao.setSaoLcuBasedOptimization(top->param.saoLcuBasedOptimization);
m_sao.setMaxNumOffsetsPerPic(top->getMaxNumOffsetsPerPic());
- m_sao.create(top->param.sourceWidth, top->param.sourceHeight, g_maxCUWidth, g_maxCUHeight);
+ m_sao.create(top->param.sourceWidth, top->param.sourceHeight, g_maxCUWidth, g_maxCUHeight, m_cfg->getColorFormat());
m_sao.createEncBuffer();
}
@@ -222,8 +225,8 @@
// Border extend Left and Right
primitives.extendRowBorder(recon->getLumaAddr(lineStartCUAddr), recon->getStride(), recon->getWidth(), realH, recon->getLumaMarginX());
- primitives.extendRowBorder(recon->getCbAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> 1, realH >> 1, recon->getChromaMarginX());
- primitives.extendRowBorder(recon->getCrAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> 1, realH >> 1, recon->getChromaMarginX());
+ primitives.extendRowBorder(recon->getCbAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> m_hChromaShift, realH >> m_vChromaShift, recon->getChromaMarginX());
+ primitives.extendRowBorder(recon->getCrAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> m_hChromaShift, realH >> m_vChromaShift, recon->getChromaMarginX());
// Border extend Top
if (row == 0)
@@ -252,8 +255,8 @@
const intptr_t stride = recon->getStride();
const intptr_t strideC = recon->getCStride();
pixel *pixY = recon->getLumaAddr(lineStartCUAddr) - recon->getLumaMarginX() + (realH - 1) * stride;
- pixel *pixU = recon->getCbAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> 1) - 1) * strideC;
- pixel *pixV = recon->getCrAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> 1) - 1) * strideC;
+ pixel *pixU = recon->getCbAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> m_vChromaShift) - 1) * strideC;
+ pixel *pixV = recon->getCrAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> m_vChromaShift) - 1) * strideC;
for (int y = 0; y < recon->getLumaMarginY(); y++)
{
@@ -290,8 +293,8 @@
uint64_t ssdY = computeSSD(orig->getLumaAddr(cuAddr), recon->getLumaAddr(cuAddr), stride, width, height);
- height >>= 1;
- width >>= 1;
+ height >>= m_vChromaShift;
+ width >>= m_hChromaShift;
stride = recon->getCStride();
uint64_t ssdU = computeSSD(orig->getCbAddr(cuAddr), recon->getCbAddr(cuAddr), stride, width, height);
@@ -337,8 +340,8 @@
updateMD5Plane(m_pic->m_state[0], recon->getLumaAddr(cuAddr), width, height, stride);
- width >>= 1;
- height >>= 1;
+ width >>= m_hChromaShift;
+ height >>= m_vChromaShift;
stride = recon->getCStride();
updateMD5Plane(m_pic->m_state[1], recon->getCbAddr(cuAddr), width, height, stride);
@@ -356,8 +359,8 @@
}
updateCRC(recon->getLumaAddr(cuAddr), m_pic->m_crc[0], height, width, stride);
- width >>= 1;
- height >>= 1;
+ width >>= m_hChromaShift;
+ height >>= m_vChromaShift;
stride = recon->getCStride();
updateCRC(recon->getCbAddr(cuAddr), m_pic->m_crc[1], height, width, stride);
@@ -374,10 +377,10 @@
m_pic->m_checksum[0] = m_pic->m_checksum[1] = m_pic->m_checksum[2] = 0;
}
updateChecksum(recon->getLumaAddr(), m_pic->m_checksum[0], height, width, stride, row, cuHeight);
- width >>= 1;
- height >>= 1;
+ width >>= m_hChromaShift;
+ height >>= m_vChromaShift;
stride = recon->getCStride();
- cuHeight >>= 1;
+ cuHeight >>= m_vChromaShift;
updateChecksum(recon->getCbAddr(), m_pic->m_checksum[1], height, width, stride, row, cuHeight);
updateChecksum(recon->getCrAddr(), m_pic->m_checksum[2], height, width, stride, row, cuHeight);
}
diff -r 4811da38078c -r f7d21da102ac source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Mon Jan 06 23:15:58 2014 -0600
+++ b/source/encoder/framefilter.h Tue Jan 07 16:44:39 2014 +0530
@@ -59,6 +59,9 @@
TEncCfg* m_cfg;
TComPic* m_pic;
+ int m_hChromaShift;
+ int m_vChromaShift;
+
public:
TComLoopFilter m_loopFilter;
More information about the x265-devel
mailing list