[x265] [PATCH] Modify TEncSearch structure to support multiple color space formats
Steve Borho
steve at borho.org
Wed Jan 8 01:08:28 CET 2014
On Tue, Jan 7, 2014 at 5:16 AM, <ashok at multicorewareinc.com> wrote:
> # HG changeset patch
> # User ashok at multicorewareinc.com
> # Date 1389093279 -19800
> # Tue Jan 07 16:44:39 2014 +0530
> # Node ID f7d21da102acf8d88be3f6ea6b6db5dc12134cdb
> # Parent 4811da38078cd02434f7da1dcc1b0af4dcf5adb8
> Modify TEncSearch structure to support multiple color space formats
>
Some parts of this patch look redundant with some earlier ones.
It's an impressive series, ignoring the white-space and style problems.
Configuring the 4:4:4 chroma primitives needs to happen
in x265_setup_primitives(), in the same place it configures other function
pointer copies. This way you get ASM optimized functions if they were
configured.
>
> diff -r 4811da38078c -r f7d21da102ac source/Lib/TLibCommon/CommonDef.h
> --- a/source/Lib/TLibCommon/CommonDef.h Mon Jan 06 23:15:58 2014 -0600
> +++ b/source/Lib/TLibCommon/CommonDef.h Tue Jan 07 16:44:39 2014 +0530
> @@ -88,6 +88,9 @@
> #define MLS_GRP_NUM 64 ///< G644 : Max number of
> coefficient groups, max(16, 64)
> #define MLS_CG_SIZE 4 ///< G644 : Coefficient group size
> of 4x4
>
> +#define MLS_CG_LOG2_WIDTH 2
> +#define MLS_CG_LOG2_HEIGHT 2
> +
> #define ARL_C_PRECISION 7 ///< G382: 7-bit arithmetic
> precision
> #define LEVEL_RANGE 30 ///< G382: max coefficient level
> in statistics collection
>
> diff -r 4811da38078c -r f7d21da102ac source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jan 06 23:15:58 2014
> -0600
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jan 07 16:44:39 2014
> +0530
> @@ -229,7 +229,7 @@
>
> if (bChroma)
> {
> - if (trSizeLog2 > 2)
> + if ((trSizeLog2 > 2) && !(cu->getChromaFormat() == CHROMA_444))
> {
> if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U,
> trDepth - 1))
> m_entropyCoder->encodeQtCbf(cu, absPartIdx,
> TEXT_CHROMA_U, trDepth);
> @@ -275,7 +275,7 @@
> return;
> }
>
> - if (ttype != TEXT_LUMA && trSizeLog2 == 2)
> + if ( (ttype != TEXT_LUMA) && (trSizeLog2 == 2) &&
> !(cu->getChromaFormat() == CHROMA_444))
> {
> assert(trDepth > 0);
> trDepth--;
> @@ -288,9 +288,11 @@
> }
>
> //===== coefficients =====
> - uint32_t width = cu->getWidth(0) >> (trDepth + chroma);
> - uint32_t height = cu->getHeight(0) >> (trDepth + chroma);
> - uint32_t coeffOffset = (cu->getPic()->getMinCUWidth() *
> cu->getPic()->getMinCUHeight() * absPartIdx) >> (chroma << 1);
> + int cspx = chroma ? m_hChromaShift : 0;
> + int cspy = chroma ? m_vChromaShift : 0;
> + uint32_t width = cu->getWidth(0) >> (trDepth + cspx);
> + uint32_t height = cu->getHeight(0) >> (trDepth + cspy);
> + uint32_t coeffOffset = (cu->getPic()->getMinCUWidth() >> cspx) *
> (cu->getPic()->getMinCUHeight() >> cspy) * absPartIdx;
> uint32_t qtLayer =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> TCoeff* coeff = 0;
> switch (ttype)
> @@ -363,12 +365,23 @@
> }
> }
> }
> +
> if (bChroma)
> {
> // chroma prediction mode
> - if (absPartIdx == 0)
> + if ((cu->getPartitionSize(0) == SIZE_2Nx2N) ||
> !(cu->getChromaFormat() == CHROMA_444))
> {
> - m_entropyCoder->encodeIntraDirModeChroma(cu, 0, true);
> + if (absPartIdx == 0)
> + {
> + m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx,
> true);
> + }
> + }
> + else
> + {
> + uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
> + assert(trDepth > 0);
> + if ((absPartIdx%qtNumParts) == 0)
> + m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx,
> true);
> }
> }
> }
> @@ -475,7 +488,7 @@
> int lastPos = -1;
> cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
>
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
> m_trQuant->selectLambda(TEXT_LUMA);
>
> absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width,
> height, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
> @@ -520,7 +533,7 @@
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> fullDepth] + 2;
>
> - if (trSizeLog2 == 2)
> + if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
> {
> assert(trDepth > 0);
> trDepth--;
> @@ -534,7 +547,7 @@
>
> TextType ttype = (chromaId > 0 ? TEXT_CHROMA_V :
> TEXT_CHROMA_U);
> uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdx);
> - uint32_t width = cu->getWidth(0) >> (trDepth +
> m_hChromaShift);
> + uint32_t width = cu->getWidth(0) >> (trDepth +
> m_hChromaShift);
> uint32_t height = cu->getHeight(0) >> (trDepth +
> m_vChromaShift);
> uint32_t stride = fencYuv->getCStride();
> Pel* fenc = (chromaId > 0 ?
> fencYuv->getCrAddr(absPartIdx) : fencYuv->getCbAddr(absPartIdx));
> @@ -543,10 +556,10 @@
> Pel* recon = (chromaId > 0 ?
> predYuv->getCrAddr(absPartIdx) : predYuv->getCbAddr(absPartIdx));
>
> uint32_t qtlayer =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> - uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUWidth()
> * cu->getSlice()->getSPS()->getMaxCUHeight() >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> 2;
> + uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUWidth()
> * cu->getSlice()->getSPS()->getMaxCUHeight() >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> (m_hChromaShift +
> m_vChromaShift);
> TCoeff* coeff = (chromaId > 0 ? m_qtTempCoeffCr[qtlayer] :
> m_qtTempCoeffCb[qtlayer]) + numCoeffPerInc * absPartIdx;
> int16_t* reconQt = (chromaId > 0 ?
> m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx) :
> m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx));
> - assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
> + uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;
>
> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> Pel* reconIPred = (chromaId > 0 ?
> cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder) :
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder));
> @@ -557,7 +570,7 @@
> //===== update chroma mode =====
> if (chromaPredMode == DM_CHROMA_IDX)
> {
> - chromaPredMode = cu->getLumaIntraDir(0);
> + chromaPredMode = cu->getLumaIntraDir(absPartIdx);
> }
>
> //===== init availability pattern =====
> @@ -565,11 +578,11 @@
> {
> cu->getPattern()->initPattern(cu, trDepth, absPartIdx);
>
> - cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, m_predBufStride, m_predBufHeight);
> + cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, m_predBufStride, m_predBufHeight, chromaId);
> Pel* chromaPred = (chromaId > 0 ?
> cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) :
> cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));
>
> //===== get prediction signal =====
> - predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width);
> + predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width, height, cu->getChromaFormat());
>
> // save prediction
> if (default0Save1Load2 == 1)
> @@ -612,7 +625,7 @@
> {
> curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> }
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
> m_trQuant->selectLambda(TEXT_CHROMA);
>
> @@ -639,7 +652,7 @@
> //===== reconstruction =====
> assert(((uint32_t)(size_t)residual & (width - 1)) == 0);
> assert(width <= 32);
> - primitives.calcrecon[size](pred, residual, recon, reconQt,
> reconIPred, stride, MAX_CU_SIZE / 2, reconIPredStride);
> + primitives.calcrecon[size](pred, residual, recon, reconQt,
> reconIPred, stride, reconQtStride, reconIPredStride);
>
> //===== update distortion =====
> uint32_t dist = primitives.sse_pp[part](fenc, stride, recon, stride);
> @@ -702,11 +715,11 @@
> uint32_t singleCbfY = 0;
> uint32_t singleCbfU = 0;
> uint32_t singleCbfV = 0;
> - bool checkTransformSkip =
> cu->getSlice()->getPPS()->getUseTransformSkip();
> + bool checkTransformSkip =
> cu->getSlice()->getPPS()->getUseTransformSkip();
> uint32_t widthTransformSkip = cu->getWidth(0) >> trDepth;
> uint32_t heightTransformSkip = cu->getHeight(0) >> trDepth;
> - int bestModeId = 0;
> - int bestModeIdUV[2] = { 0, 0 };
> + int bestModeId = 0;
> + int bestModeIdUV[2] = { 0, 0 };
>
> checkTransformSkip &= (widthTransformSkip == 4 && heightTransformSkip
> == 4);
> checkTransformSkip &= (!cu->getCUTransquantBypass(0));
> @@ -729,8 +742,8 @@
> uint32_t singleCbfUTmp = 0;
> uint32_t singleCbfVTmp = 0;
> uint64_t singleCostTmp = 0;
> - int default0Save1Load2 = 0;
> - int firstCheckId = 0;
> + int default0Save1Load2 = 0;
> + int firstCheckId = 0;
>
> uint32_t qpdiv = cu->getPic()->getNumPartInCU() >>
> ((cu->getDepth(0) + (trDepth - 1)) << 1);
> bool bFirstQ = ((absPartIdx % qpdiv) == 0);
> @@ -964,17 +977,17 @@
>
> if (!bLumaOnly)
> {
> - width >>= 1;
> - height >>= 1;
> + width >>= m_hChromaShift;
> + height >>= m_vChromaShift;
> src = m_qtTempTComYuv[qtLayer].getCbAddr(absPartIdx);
> - assert(m_qtTempTComYuv[qtLayer].m_cwidth == MAX_CU_SIZE / 2);
> + uint32_t srcstride = m_qtTempTComYuv[qtLayer].m_cwidth;
> dst =
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
> dststride = cu->getPic()->getPicYuvRec()->getCStride();
> - primitives.blockcpy_ps(width, height, dst, dststride, src,
> MAX_CU_SIZE / 2);
> + primitives.blockcpy_ps(width, height, dst, dststride, src,
> srcstride);
>
> src = m_qtTempTComYuv[qtLayer].getCrAddr(absPartIdx);
> dst = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(),
> zorder);
> - primitives.blockcpy_ps(width, height, dst, dststride, src,
> MAX_CU_SIZE / 2);
> + primitives.blockcpy_ps(width, height, dst, dststride, src,
> srcstride);
> }
> }
>
> @@ -1049,7 +1062,7 @@
> int lastPos = -1;
> cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
>
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
> m_trQuant->selectLambda(TEXT_LUMA);
> absSum = m_trQuant->transformNxN(cu, residual, stride, coeff,
> width, height, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
>
> @@ -1081,7 +1094,6 @@
> if (bCheckSplit && !bCheckFull)
> {
> //----- code splitted block -----
> -
> uint32_t qPartsDiv = cu->getPic()->getNumPartInCU() >>
> ((fullDepth + 1) << 1);
> uint32_t absPartIdxSub = absPartIdx;
> uint32_t splitCbfY = 0;
> @@ -1267,12 +1279,12 @@
> reconIPred =
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zOrder);
> reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
> reconQt = m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx);
> - assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
> - primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, MAX_CU_SIZE / 2);
> + uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;
> + primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, reconQtStride);
>
> reconIPred =
> cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zOrder);
> reconQt = m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx);
> - primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, MAX_CU_SIZE / 2);
> + primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, reconQtStride);
> }
> }
>
> @@ -1376,20 +1388,20 @@
> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> uint32_t width = cu->getWidth(0) >> (trDepth + 1);
> uint32_t height = cu->getHeight(0) >> (trDepth + 1);
> - assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
> + uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;
> uint32_t reconIPredStride =
> cu->getPic()->getPicYuvRec()->getCStride();
>
> if (stateU0V1Both2 == 0 || stateU0V1Both2 == 2)
> {
> Pel* reconIPred =
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
> int16_t* reconQt =
> m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx);
> - primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, MAX_CU_SIZE / 2);
> + primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, reconQtStride);
> }
> if (stateU0V1Both2 == 1 || stateU0V1Both2 == 2)
> {
> Pel* reconIPred =
> cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> int16_t* reconQt =
> m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx);
> - primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, MAX_CU_SIZE / 2);
> + primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, reconQtStride);
> }
> }
> }
> @@ -1411,7 +1423,7 @@
> uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> fullDepth] + 2;
>
> uint32_t actualTrDepth = trDepth;
> - if (trSizeLog2 == 2)
> + if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
> {
> assert(trDepth > 0);
> actualTrDepth--;
> @@ -1557,7 +1569,7 @@
> uint32_t qtlayer =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>
> bool bChromaSame = false;
> - if (trSizeLog2 == 2)
> + if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
> {
> assert(trDepth > 0);
> uint32_t qpdiv = cu->getPic()->getNumPartInCU() >>
> ((cu->getDepth(0) + trDepth - 1) << 1);
> @@ -1572,9 +1584,11 @@
> uint32_t numCoeffC = (cu->getSlice()->getSPS()->getMaxCUWidth() *
> cu->getSlice()->getSPS()->getMaxCUHeight()) >> (fullDepth << 1);
> if (!bChromaSame)
> {
> - numCoeffC >>= 2;
> + numCoeffC = ((cu->getSlice()->getSPS()->getMaxCUWidth() >>
> m_hChromaShift) * (cu->getSlice()->getSPS()->getMaxCUHeight() >>
> m_vChromaShift)) >> (fullDepth << 1);
> }
> - uint32_t numCoeffIncC =
> (cu->getSlice()->getSPS()->getMaxCUWidth() *
> cu->getSlice()->getSPS()->getMaxCUHeight()) >>
> ((cu->getSlice()->getSPS()->getMaxCUDepth() << 1) + 2);
> +
> + uint32_t numCoeffIncC =
> ((cu->getSlice()->getSPS()->getMaxCUWidth() >> m_hChromaShift) *
> (cu->getSlice()->getSPS()->getMaxCUHeight() >> m_vChromaShift)) >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
> +
> TCoeff* coeffSrcU = m_qtTempCoeffCb[qtlayer] + (numCoeffIncC *
> absPartIdx);
> TCoeff* coeffSrcV = m_qtTempCoeffCr[qtlayer] + (numCoeffIncC *
> absPartIdx);
> TCoeff* coeffDstU = cu->getCoeffCb() + (numCoeffIncC *
> absPartIdx);
> @@ -1583,7 +1597,7 @@
> ::memcpy(coeffDstV, coeffSrcV, sizeof(TCoeff) * numCoeffC);
>
> //===== copy reconstruction =====
> - uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 -
> 1);
> + uint32_t trSizeCLog2 = (bChromaSame || (cu->getChromaFormat() ==
> CHROMA_444)) ? trSizeLog2 : trSizeLog2 - 1;
> m_qtTempTComYuv[qtlayer].copyPartToPartChroma(reconYuv,
> absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2);
> }
> else
> @@ -1650,11 +1664,11 @@
> }
> //===== init availability pattern =====
> cu->getPattern()->initPattern(cu, trDepth, absPartIdx);
> - cu->getPattern()->initAdiPatternChroma(cu, absPartIdx,
> trDepth, m_predBuf, m_predBufStride, m_predBufHeight);
> + cu->getPattern()->initAdiPatternChroma(cu, absPartIdx,
> trDepth, m_predBuf, m_predBufStride, m_predBufHeight, chromaId);
> Pel* chromaPred = (chromaId > 0 ?
> cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) :
> cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));
>
> //===== get prediction signal =====
> - predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width);
> + predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width, height, cu->getChromaFormat());
>
> //===== get residual signal =====
> assert(!((uint32_t)(size_t)fenc & (width - 1)));
> @@ -1676,7 +1690,7 @@
> {
> curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> }
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
> m_trQuant->selectLambda(TEXT_CHROMA);
>
> @@ -1741,7 +1755,8 @@
> //===== init pattern =====
> assert(width == height);
> cu->getPattern()->initPattern(cu, 0, 0);
> - cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf,
> m_predBufStride, m_predBufHeight);
> + cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf,
> m_predBufStride, m_predBufHeight, 0/*chromaId*/);
> + cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf,
> m_predBufStride, m_predBufHeight, 1/*chromaId*/);
> Pel* patChromaU = cu->getPattern()->getAdiCbBuf(width, height,
> m_predBuf);
> Pel* patChromaV = cu->getPattern()->getAdiCrBuf(width, height,
> m_predBuf);
>
> @@ -1754,8 +1769,8 @@
> for (uint32_t mode = minMode; mode < maxMode; mode++)
> {
> //--- get prediction ---
> - predIntraChromaAng(patChromaU, mode, predU, stride, width);
> - predIntraChromaAng(patChromaV, mode, predV, stride, width);
> + predIntraChromaAng(patChromaU, mode, predU, stride, width,
> height, cu->getChromaFormat());
> + predIntraChromaAng(patChromaV, mode, predV, stride, width,
> height, cu->getChromaFormat());
>
> //--- get SAD ---
> uint32_t sad = sa8d(fencU, stride, predU, stride) + sa8d(fencV,
> stride, predV, stride);
> @@ -2131,13 +2146,14 @@
>
> if (width > 32)
> {
> - scaleWidth = 32;
> - scaleStride = 32;
> + scaleWidth = 32;
> + scaleStride = 32;
> costMultiplier = 4;
> }
>
> cu->getPattern()->initPattern(cu, trDepth, absPartIdx);
> - cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, m_predBufStride, m_predBufHeight);
> + cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, m_predBufStride, m_predBufHeight, 0);
> + cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, m_predBufStride, m_predBufHeight, 1);
>
> cu->getAllowedChromaDir(0, modeList);
> //----- check chroma modes -----
> @@ -2156,7 +2172,7 @@
> Pel* chromaPred = (chromaId > 0 ?
> cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) :
> cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));
>
> //===== get prediction signal =====
> - predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width);
> + predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width, height, cu->getChromaFormat());
> int log2SizeMinus2 = g_convertToBit[scaleWidth];
> pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
> sad = costMultiplier * sa8d(fenc, scaleStride, pred,
> scaleStride);
> @@ -2174,6 +2190,27 @@
> cu->setChromIntraDirSubParts(bestMode, 0, depth);
> }
>
> +bool TEncSearch::isNextSection()
> +{
> + if (m_splitMode == DONT_SPLIT)
> + {
> + m_section++;
> + return false;
> + }
> + else
> + {
> + m_absPartIdxTURelCU += m_absPartIdxStep;
> +
> + m_section++;
> + return m_section< (1 << m_splitMode);
> + }
> +}
> +
> +bool TEncSearch::isLastSection()
> +{
> + return (m_section+1) >= (1<<m_splitMode);
> +}
> +
> void TEncSearch::estIntraPredChromaQT(TComDataCU* cu,
> TComYuv* fencYuv,
> TComYuv* predYuv,
> @@ -2181,60 +2218,109 @@
> TComYuv* reconYuv,
> uint32_t preCalcDistC)
> {
> - uint32_t depth = cu->getDepth(0);
> - uint32_t bestMode = 0;
> - uint32_t bestDist = 0;
> - uint64_t bestCost = MAX_INT64;
> -
> - //----- init mode list -----
> - uint32_t minMode = 0;
> - uint32_t maxMode = NUM_CHROMA_MODE;
> - uint32_t modeList[NUM_CHROMA_MODE];
> -
> - cu->getAllowedChromaDir(0, modeList);
> -
> - //----- check chroma modes -----
> - for (uint32_t mode = minMode; mode < maxMode; mode++)
> + uint32_t depth = cu->getDepth(0);
> + uint32_t initTrDepth = (cu->getPartitionSize(0) != SIZE_2Nx2N)
> && (cu->getChromaFormat() == CHROMA_444 ? 1 : 0);
> + m_splitMode = (initTrDepth == 0) ? DONT_SPLIT :
> QUAD_SPLIT;
> + m_absPartIdxStep = (cu->getPic()->getNumPartInCU() >>
> (depth << 1)) >> partIdxStepShift[m_splitMode];
> + m_partOffset = 0;
> + m_section = 0;
> + m_absPartIdxTURelCU = 0;
> +
> + do
> {
> - //----- restore context models -----
> - m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
> -
> - //----- chroma coding -----
> - uint32_t dist = 0;
> - cu->setChromIntraDirSubParts(modeList[mode], 0, depth);
> - xRecurIntraChromaCodingQT(cu, 0, 0, fencYuv, predYuv, resiYuv,
> dist);
> - if (cu->getSlice()->getPPS()->getUseTransformSkip())
> + uint32_t bestMode = 0;
> + uint32_t bestDist = 0;
> + uint64_t bestCost = MAX_INT64;
> +
> + //----- init mode list -----
> + uint32_t minMode = 0;
> + uint32_t maxMode = NUM_CHROMA_MODE;
> + uint32_t modeList[NUM_CHROMA_MODE];
> +
> + m_partOffset = m_absPartIdxTURelCU;
> +
> + cu->getAllowedChromaDir(m_partOffset, modeList);
> +
> + //----- check chroma modes -----
> + for (uint32_t mode = minMode; mode < maxMode; mode++)
> {
> + //----- restore context models -----
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
> +
> + //----- chroma coding -----
> + uint32_t dist = 0;
> +
> + cu->setChromIntraDirSubParts(modeList[mode], m_partOffset,
> depth + initTrDepth);
> +
> + xRecurIntraChromaCodingQT(cu, initTrDepth,
> m_absPartIdxTURelCU, fencYuv, predYuv, resiYuv, dist);
> +
> + if (cu->getSlice()->getPPS()->getUseTransformSkip())
> + {
> +
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
> + }
> +
> + uint32_t bits = xGetIntraBitsQT(cu, initTrDepth,
> m_absPartIdxTURelCU, false, true);
> + uint64_t cost = m_rdCost->calcRdCost(dist, bits);
> +
> + //----- compare -----
> + if (cost < bestCost)
> + {
> + bestCost = cost;
> + bestDist = dist;
> + bestMode = modeList[mode];
> + xSetIntraResultChromaQT(cu, initTrDepth,
> m_absPartIdxTURelCU, reconYuv);
> + ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U) +
> m_partOffset, m_absPartIdxStep * sizeof(UChar));
> + ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V) +
> m_partOffset, m_absPartIdxStep * sizeof(UChar));
> + ::memcpy(m_qtTempTransformSkipFlag[1],
> cu->getTransformSkip(TEXT_CHROMA_U) + m_partOffset, m_absPartIdxStep *
> sizeof(UChar));
> + ::memcpy(m_qtTempTransformSkipFlag[2],
> cu->getTransformSkip(TEXT_CHROMA_V) + m_partOffset, m_absPartIdxStep *
> sizeof(UChar));
> + }
> }
>
> - uint32_t bits = xGetIntraBitsQT(cu, 0, 0, false, true);
> - uint64_t cost = m_rdCost->calcRdCost(dist, bits);
> -
> - //----- compare -----
> - if (cost < bestCost)
> + if (!isLastSection())
> {
> - bestCost = cost;
> - bestDist = dist;
> - bestMode = modeList[mode];
> - uint32_t qpn = cu->getPic()->getNumPartInCU() >> (depth << 1);
> - xSetIntraResultChromaQT(cu, 0, 0, reconYuv);
> - ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U), qpn *
> sizeof(UChar));
> - ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V), qpn *
> sizeof(UChar));
> - ::memcpy(m_qtTempTransformSkipFlag[1],
> cu->getTransformSkip(TEXT_CHROMA_U), qpn * sizeof(UChar));
> - ::memcpy(m_qtTempTransformSkipFlag[2],
> cu->getTransformSkip(TEXT_CHROMA_V), qpn * sizeof(UChar));
> + uint32_t compWidth = (cu->getWidth(0) >> m_hChromaShift)
> >> initTrDepth;
> + uint32_t compHeight = (cu->getHeight(0) >> m_vChromaShift)
> >> initTrDepth;
> + uint32_t zorder = cu->getZorderIdxInCU() + m_partOffset;
> + Pel* dst =
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
> + uint32_t dststride =
> cu->getPic()->getPicYuvRec()->getCStride();
> + Pel* src = reconYuv->getCbAddr(m_partOffset);
> + uint32_t srcstride = reconYuv->getCStride();
> +
> + primitives.blockcpy_pp(compWidth, compHeight, dst, dststride,
> src, srcstride);
> +
> + dst =
> cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> + src = reconYuv->getCrAddr(m_partOffset);
> + primitives.blockcpy_pp(compWidth, compHeight, dst, dststride,
> src, srcstride);
> + }
> +
> + //----- set data -----
> + ::memcpy(cu->getCbf(TEXT_CHROMA_U) + m_partOffset,
> m_qtTempCbf[1], m_absPartIdxStep * sizeof(UChar));
> + ::memcpy(cu->getCbf(TEXT_CHROMA_V) + m_partOffset,
> m_qtTempCbf[2], m_absPartIdxStep * sizeof(UChar));
> + ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U) + m_partOffset,
> m_qtTempTransformSkipFlag[1], m_absPartIdxStep * sizeof(UChar));
> + ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V) + m_partOffset,
> m_qtTempTransformSkipFlag[2], m_absPartIdxStep * sizeof(UChar));
> + cu->setChromIntraDirSubParts(bestMode, m_partOffset, depth +
> initTrDepth);
> + cu->m_totalDistortion += bestDist - preCalcDistC;
> +
> + } while(isNextSection());
> +
> + //----- restore context models -----
> + if (initTrDepth != 0)
> + { // set Cbf for all blocks
> + uint32_t uiCombCbfU = 0;
> + uint32_t uiCombCbfV = 0;
> + uint32_t uiPartIdx = 0;
> + for (uint32_t uiPart = 0; uiPart < 4; uiPart++, uiPartIdx +=
> m_absPartIdxStep)
> + {
> + uiCombCbfU |= cu->getCbf(uiPartIdx, TEXT_CHROMA_U, 1);
> + uiCombCbfV |= cu->getCbf(uiPartIdx, TEXT_CHROMA_V, 1);
> + }
> + for (uint32_t uiOffs = 0; uiOffs < 4 * m_absPartIdxStep; uiOffs++)
> + {
> + cu->getCbf( TEXT_CHROMA_U )[ uiOffs ] |= uiCombCbfU;
> + cu->getCbf( TEXT_CHROMA_V )[ uiOffs ] |= uiCombCbfV;
> }
> }
>
> - //----- set data -----
> - uint32_t qpn = cu->getPic()->getNumPartInCU() >> (depth << 1);
> - ::memcpy(cu->getCbf(TEXT_CHROMA_U), m_qtTempCbf[1], qpn *
> sizeof(UChar));
> - ::memcpy(cu->getCbf(TEXT_CHROMA_V), m_qtTempCbf[2], qpn *
> sizeof(UChar));
> - ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U),
> m_qtTempTransformSkipFlag[1], qpn * sizeof(UChar));
> - ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V),
> m_qtTempTransformSkipFlag[2], qpn * sizeof(UChar));
> - cu->setChromIntraDirSubParts(bestMode, 0, depth);
> - cu->m_totalDistortion += bestDist - preCalcDistC;
> -
> //----- restore context models -----
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
> }
> @@ -3085,10 +3171,11 @@
> outResiYuv->clear();
>
> predYuv->copyToPartYuv(outReconYuv, 0);
> -
> + //Luma
> int part = partitionFromSizes(width, height);
> distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(),
> fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
> - part = partitionFromSizes(width >> 1, height >> 1);
> + //Chroma
> + part = partitionFromSizes(width >> m_hChromaShift, height >>
> m_vChromaShift);
> distortion +=
> m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(),
> fencYuv->getCStride(), outReconYuv->getCbAddr(),
> outReconYuv->getCStride()));
> distortion +=
> m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(),
> fencYuv->getCStride(), outReconYuv->getCrAddr(),
> outReconYuv->getCStride()));
>
> @@ -3208,7 +3295,7 @@
> // update with clipped distortion and cost (qp estimation loop uses
> unclipped values)
> int part = partitionFromSizes(width, height);
> bdist = primitives.sse_pp[part](fencYuv->getLumaAddr(),
> fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
> - part = partitionFromSizes(width >> 1, height >> 1);
> + part = partitionFromSizes(width >> cu->getHorzChromaShift(), height
> >> cu->getVertChromaShift());
> bdist +=
> m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(),
> fencYuv->getCStride(), outReconYuv->getCbAddr(),
> outReconYuv->getCStride()));
> bdist +=
> m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(),
> fencYuv->getCStride(), outReconYuv->getCrAddr(),
> outReconYuv->getCStride()));
> bcost = m_rdCost->calcRdCost(bdist, bestBits);
> @@ -3311,7 +3398,7 @@
> cu->setTransformSkipSubParts(0, TEXT_CHROMA_V, absPartIdx,
> cu->getDepth(0) + trModeC);
> }
>
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
> m_trQuant->selectLambda(TEXT_LUMA);
>
> absSumY = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
> @@ -3322,7 +3409,7 @@
> if (bCodeChroma)
> {
> int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
> m_trQuant->selectLambda(TEXT_CHROMA);
>
> @@ -3330,7 +3417,7 @@
> trWidthC, trHeightC,
> TEXT_CHROMA_U, absPartIdx, &lastPosU, false, curuseRDOQ);
>
> curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
> absSumV = m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
> trWidthC, trHeightC,
> TEXT_CHROMA_V, absPartIdx, &lastPosV, false, curuseRDOQ);
>
> @@ -3342,7 +3429,7 @@
> {
> int16_t *curResiY = resiYuv->getLumaAddr(absTUPartIdx);
>
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>
> int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];
> assert(scalingListType < 6);
> @@ -3362,7 +3449,7 @@
> int16_t *pcResiCurrU = resiYuv->getCbAddr(absTUPartIdxC);
>
> int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
> int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];
> assert(scalingListType < 6);
> @@ -3378,7 +3465,7 @@
> {
> int16_t *curResiV = resiYuv->getCrAddr(absTUPartIdxC);
> int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
> int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];
> assert(scalingListType < 6);
> @@ -3453,6 +3540,7 @@
> assert(cu->getDepth(0) == cu->getDepth(absPartIdx));
> const uint32_t trMode = depth - cu->getDepth(0);
> const uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
> + uint32_t trSizeCLog2 =
> g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >>
> m_hChromaShift) >> depth] + 2;;
>
> bool bSplitFlag =
> ((cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) &&
> cu->getPredictionMode(absPartIdx) == MODE_INTER &&
> (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N));
> bool bCheckFull;
> @@ -3465,12 +3553,11 @@
>
> bool bCodeChroma = true;
> uint32_t trModeC = trMode;
> - uint32_t trSizeCLog2 = trSizeLog2 - 1;
> - if (trSizeLog2 == 2)
> + if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
> {
> trSizeCLog2++;
> trModeC--;
> - uint32_t qpdiv = cu->getPic()->getNumPartInCU() >>
> ((cu->getDepth(0) + trModeC) << 1);
> + uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1)
> << 1);
> bCodeChroma = ((absPartIdx % qpdiv) == 0);
> }
>
> @@ -3490,8 +3577,8 @@
> const uint32_t numCoeffPerAbsPartIdxIncrement =
> cu->getSlice()->getSPS()->getMaxCUWidth() *
> cu->getSlice()->getSPS()->getMaxCUHeight() >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
> const uint32_t qtlayer =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> TCoeff *coeffCurY = m_qtTempCoeffY[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx);
> - TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> - TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> + TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> + TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
>
> int trWidth = 0, trHeight = 0, trWidthC = 0, trHeightC = 0;
> uint32_t absTUPartIdxC = absPartIdx;
> @@ -3520,7 +3607,7 @@
> m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac,
> trWidth, trHeight, TEXT_LUMA);
> }
>
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
> m_trQuant->selectLambda(TEXT_LUMA);
>
> absSumY = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
> @@ -3534,17 +3621,17 @@
> {
> m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac,
> trWidthC, trHeightC, TEXT_CHROMA);
> }
> -
> + //Cb transform
> int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
> m_trQuant->selectLambda(TEXT_CHROMA);
>
> absSumU = m_trQuant->transformNxN(cu,
> resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,
> trWidthC, trHeightC,
> TEXT_CHROMA_U, absPartIdx, &lastPosU, false, curuseRDOQ);
> -
> + //Cr transform
> curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
> absSumV = m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
> trWidthC, trHeightC,
> TEXT_CHROMA_V, absPartIdx, &lastPosV, false, curuseRDOQ);
>
> @@ -3586,7 +3673,7 @@
> {
> int16_t *curResiY =
> m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx);
>
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>
> int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];
> assert(scalingListType < 6);
> @@ -3658,16 +3745,15 @@
> int16_t *pcResiCurrU =
> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
>
> int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
> int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];
> assert(scalingListType < 6);
> - assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> -
> m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> pcResiCurrU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC,
> scalingListType, false, lastPosU);
> +
> m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> pcResiCurrU, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurU, trWidthC,
> trHeightC, scalingListType, false, lastPosU);
>
> uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absTUPartIdxC),
> resiYuv->m_cwidth,
>
> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC),
> - MAX_CU_SIZE
> / 2);
> +
> m_qtTempTComYuv[qtlayer].m_cwidth);
> const uint32_t nonZeroDistU =
> m_rdCost->scaleChromaDistCb(dist);
>
> if (cu->isLosslessCoded(0))
> @@ -3710,10 +3796,10 @@
> if (!absSumU)
> {
> int16_t *ptr =
> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
> - assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> + const uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;
>
> assert(trWidthC == trHeightC);
> -
> primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, MAX_CU_SIZE /
> 2, 0);
> +
> primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, stride, 0);
> }
>
> distV =
> m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(absTUPartIdxC),
> resiYuv->m_cwidth, m_tempPel, trWidthC));
> @@ -3725,16 +3811,15 @@
> {
> int16_t *curResiV =
> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);
> int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
> int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];
> assert(scalingListType < 6);
> - assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> -
> m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiV, MAX_CU_SIZE / 2, coeffCurV, trWidthC, trHeightC, scalingListType,
> false, lastPosV);
> +
> m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiV, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurV, trWidthC,
> trHeightC, scalingListType, false, lastPosV);
>
> uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absTUPartIdxC),
> resiYuv->m_cwidth,
>
> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC),
> - MAX_CU_SIZE
> / 2);
> +
> m_qtTempTComYuv[qtlayer].m_cwidth);
> const uint32_t nonZeroDistV =
> m_rdCost->scaleChromaDistCr(dist);
>
> if (cu->isLosslessCoded(0))
> @@ -3777,10 +3862,10 @@
> if (!absSumV)
> {
> int16_t *ptr =
> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);
> - assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> + const uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;
>
> assert(trWidthC == trHeightC);
> -
> primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, MAX_CU_SIZE /
> 2, 0);
> +
> primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, stride, 0);
> }
> }
> cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx,
> depth);
> @@ -3817,7 +3902,7 @@
> m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac,
> trWidth, trHeight, TEXT_LUMA);
> }
>
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>
> m_trQuant->selectLambda(TEXT_LUMA);
> absSumTransformSkipY = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
> @@ -3831,7 +3916,7 @@
> m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx,
> trWidth, trHeight, depth, TEXT_LUMA);
> const uint32_t skipSingleBitsY =
> m_entropyCoder->getNumberOfWrittenBits();
>
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>
> int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];
> assert(scalingListType < 6);
> @@ -3874,7 +3959,7 @@
>
> int16_t *curResiU =
> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
> int16_t *curResiV =
> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);
> - assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
> + uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;
>
> TCoeff bestCoeffU[32 * 32], bestCoeffV[32 * 32];
> memcpy(bestCoeffU, coeffCurU, sizeof(TCoeff) *
> numSamplesChroma);
> @@ -3883,8 +3968,8 @@
> int16_t bestResiU[32 * 32], bestResiV[32 * 32];
> for (int i = 0; i < trHeightC; ++i)
> {
> - memcpy(&bestResiU[i * trWidthC], curResiU + i *
> (MAX_CU_SIZE / 2), sizeof(int16_t) * trWidthC);
> - memcpy(&bestResiV[i * trWidthC], curResiV + i *
> (MAX_CU_SIZE / 2), sizeof(int16_t) * trWidthC);
> + memcpy(&bestResiU[i * trWidthC], curResiU + i * stride,
> sizeof(int16_t) * trWidthC);
> + memcpy(&bestResiV[i * trWidthC], curResiV + i * stride,
> sizeof(int16_t) * trWidthC);
> }
>
>
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);
> @@ -3898,13 +3983,13 @@
> }
>
> int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
> m_trQuant->selectLambda(TEXT_CHROMA);
>
> absSumTransformSkipU = m_trQuant->transformNxN(cu,
> resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,
> trWidthC,
> trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosTransformSkipU, true,
> curuseRDOQ);
> curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
> absSumTransformSkipV = m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
> trWidthC,
> trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosTransformSkipV, true,
> curuseRDOQ);
>
> @@ -3922,17 +4007,15 @@
> singleBitsU = m_entropyCoder->getNumberOfWrittenBits();
>
> curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
> int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];
> assert(scalingListType < 6);
> - assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> -
> -
> m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC, scalingListType,
> true, lastPosTransformSkipU);
> +
> m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiU, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurU, trWidthC,
> trHeightC, scalingListType, true, lastPosTransformSkipU);
>
> uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absTUPartIdxC),
> resiYuv->m_cwidth,
>
> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC),
> - MAX_CU_SIZE
> / 2);
> +
> m_qtTempTComYuv[qtlayer].m_cwidth);
> nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
> singleCostU = m_rdCost->calcRdCost(nonZeroDistU,
> singleBitsU);
> }
> @@ -3944,7 +4027,7 @@
> memcpy(coeffCurU, bestCoeffU, sizeof(TCoeff) *
> numSamplesChroma);
> for (int i = 0; i < trHeightC; ++i)
> {
> - memcpy(curResiU + i * (MAX_CU_SIZE / 2), &bestResiU[i
> * trWidthC], sizeof(int16_t) * trWidthC);
> + memcpy(curResiU + i * stride, &bestResiU[i *
> trWidthC], sizeof(int16_t) * trWidthC);
> }
> }
> else
> @@ -3961,17 +4044,15 @@
> singleBitsV = m_entropyCoder->getNumberOfWrittenBits() -
> singleBitsU;
>
> curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> - m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> + m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
> int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];
> assert(scalingListType < 6);
> - assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> -
> -
> m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiV, MAX_CU_SIZE / 2, coeffCurV, trWidthC, trHeightC, scalingListType,
> true, lastPosTransformSkipV);
> +
> m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiV, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurV, trWidthC,
> trHeightC, scalingListType, true, lastPosTransformSkipV);
>
> uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absTUPartIdxC),
> resiYuv->m_cwidth,
>
> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC),
> - MAX_CU_SIZE
> / 2);
> +
> m_qtTempTComYuv[qtlayer].m_cwidth);
> nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
> singleCostV = m_rdCost->calcRdCost(nonZeroDistV,
> singleBitsV);
> }
> @@ -3983,7 +4064,7 @@
> memcpy(coeffCurV, bestCoeffV, sizeof(TCoeff) *
> numSamplesChroma);
> for (int i = 0; i < trHeightC; ++i)
> {
> - memcpy(curResiV + i * (MAX_CU_SIZE / 2), &bestResiV[i
> * trWidthC], sizeof(int16_t) * trWidthC);
> + memcpy(curResiV + i * stride, &bestResiV[i *
> trWidthC], sizeof(int16_t) * trWidthC);
> }
> }
> else
> @@ -4115,6 +4196,7 @@
> const uint32_t trMode = cu->getTransformIdx(absPartIdx);
> const bool bSubdiv = curTrMode != trMode;
> const uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
> + uint32_t trSizeCLog2 =
> g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >>
> m_hChromaShift) >> depth] + 2;
>
> if (bSubdivAndCbf && trSizeLog2 <=
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && trSizeLog2 >
> cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> {
> @@ -4145,21 +4227,20 @@
>
> if (!bSubdiv)
> {
> + //Luma
> const uint32_t numCoeffPerAbsPartIdxIncrement =
> cu->getSlice()->getSPS()->getMaxCUWidth() *
> cu->getSlice()->getSPS()->getMaxCUHeight() >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
> - //assert( 16 == uiNumCoeffPerAbsPartIdxIncrement ); // check
> const uint32_t qtlayer =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> TCoeff *coeffCurY = m_qtTempCoeffY[qtlayer] +
> numCoeffPerAbsPartIdxIncrement * absPartIdx;
> - TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> - TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> +
> + //Chroma
> + TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> + TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
>
> bool bCodeChroma = true;
> - uint32_t trModeC = trMode;
> - uint32_t trSizeCLog2 = trSizeLog2 - 1;
> - if (trSizeLog2 == 2)
> + if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
> {
> trSizeCLog2++;
> - trModeC--;
> - uint32_t qpdiv = cu->getPic()->getNumPartInCU() >>
> ((cu->getDepth(0) + trModeC) << 1);
> + uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth -
> 1) << 1);
> bCodeChroma = ((absPartIdx % qpdiv) == 0);
> }
>
> @@ -4171,21 +4252,18 @@
> {
> if (ttype == TEXT_LUMA && cu->getCbf(absPartIdx, TEXT_LUMA,
> trMode))
> {
> - int trWidth = 1 << trSizeLog2;
> - int trHeight = 1 << trSizeLog2;
> - m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx,
> trWidth, trHeight, depth, TEXT_LUMA);
> + m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx,
> 1 << trSizeLog2, 1 << trSizeLog2, depth, TEXT_LUMA);
> }
> +
> if (bCodeChroma)
> {
> - int trWidth = 1 << trSizeCLog2;
> - int trHeight = 1 << trSizeCLog2;
> if (ttype == TEXT_CHROMA_U && cu->getCbf(absPartIdx,
> TEXT_CHROMA_U, trMode))
> {
> - m_entropyCoder->encodeCoeffNxN(cu, coeffCurU,
> absPartIdx, trWidth, trHeight, depth, TEXT_CHROMA_U);
> + m_entropyCoder->encodeCoeffNxN(cu, coeffCurU,
> absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2, depth, TEXT_CHROMA_U);
> }
> if (ttype == TEXT_CHROMA_V && cu->getCbf(absPartIdx,
> TEXT_CHROMA_V, trMode))
> {
> - m_entropyCoder->encodeCoeffNxN(cu, coeffCurV,
> absPartIdx, trWidth, trHeight, depth, TEXT_CHROMA_V);
> + m_entropyCoder->encodeCoeffNxN(cu, coeffCurV,
> absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2, depth, TEXT_CHROMA_V);
> }
> }
> }
> @@ -4211,13 +4289,13 @@
>
> if (curTrMode == trMode)
> {
> - const uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
> + const uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
> + uint32_t trSizeCLog2 =
> g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >>
> cu->getHorzChromaShift()) >> depth] + 2;;
> const uint32_t qtlayer =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>
> bool bCodeChroma = true;
> uint32_t trModeC = trMode;
> - uint32_t trSizeCLog2 = trSizeLog2 - 1;
> - if (trSizeLog2 == 2)
> + if((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
> {
> trSizeCLog2++;
> trModeC--;
> @@ -4246,10 +4324,10 @@
> if (bCodeChroma)
> {
> uint32_t uiNumCoeffC = (1 << (trSizeCLog2 << 1));
> - TCoeff* pcCoeffSrcU = m_qtTempCoeffCb[qtlayer] +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> - TCoeff* pcCoeffSrcV = m_qtTempCoeffCr[qtlayer] +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> - TCoeff* pcCoeffDstU = cu->getCoeffCb() +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> - TCoeff* pcCoeffDstV = cu->getCoeffCr() +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> + TCoeff* pcCoeffSrcU = m_qtTempCoeffCb[qtlayer] +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> + TCoeff* pcCoeffSrcV = m_qtTempCoeffCr[qtlayer] +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> + TCoeff* pcCoeffDstU = cu->getCoeffCb() +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> + TCoeff* pcCoeffDstV = cu->getCoeffCr() +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> ::memcpy(pcCoeffDstU, pcCoeffSrcU, sizeof(TCoeff) *
> uiNumCoeffC);
> ::memcpy(pcCoeffDstV, pcCoeffSrcV, sizeof(TCoeff) *
> uiNumCoeffC);
> }
> diff -r 4811da38078c -r f7d21da102ac source/common/TShortYUV.h
> --- a/source/common/TShortYUV.h Mon Jan 06 23:15:58 2014 -0600
> +++ b/source/common/TShortYUV.h Tue Jan 07 16:44:39 2014 +0530
> @@ -87,9 +87,9 @@
> // Access starting position of YUV partition unit buffer
> int16_t* getLumaAddr(unsigned int partUnitIdx) { return m_bufY +
> getAddrOffset(partUnitIdx, m_width); }
>
> - int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb +
> (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }
> + int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb +
> (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }
>
> - int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr +
> (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }
> + int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr +
> (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }
>
> // Access starting position of YUV transform unit buffer
> int16_t* getLumaAddr(unsigned int partIdx, unsigned int size) {
> return m_bufY + getAddrOffset(partIdx, size, m_width); }
> diff -r 4811da38078c -r f7d21da102ac source/common/ipfilter.cpp
> --- a/source/common/ipfilter.cpp Mon Jan 06 23:15:58 2014 -0600
> +++ b/source/common/ipfilter.cpp Tue Jan 07 16:44:39 2014 +0530
> @@ -449,74 +449,108 @@
> namespace x265 {
> // x265 private namespace
>
> -#define CHROMA(W, H) \
> +#define CHROMA_420(W, H) \
> p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] =
> interp_horiz_pp_c<4, W, H>; \
> p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] =
> interp_horiz_ps_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] =
> interp_vert_pp_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] =
> interp_vert_ps_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] =
> interp_vert_sp_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] =
> interp_vert_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] =
> interp_vert_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] =
> interp_vert_sp_c<4, W, H>; \
> p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] =
> interp_vert_ss_c<4, W, H>;
>
> +#define CHROMA_444(W, H) \
> + p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] =
> interp_horiz_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] =
> interp_horiz_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] =
> interp_vert_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] =
> interp_vert_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] =
> interp_vert_sp_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] =
> interp_vert_ss_c<4, W, H>;
> +
> #define LUMA(W, H) \
> p.luma_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<8, W, H>; \
> p.luma_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<8, W, H>; \
> - p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \
> - p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \
> - p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \
> - p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \
> + p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \
> + p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \
> + p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \
> + p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \
> p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_hv_pp_c<8, W, H>;
>
> void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
> {
> LUMA(4, 4);
> LUMA(8, 8);
> - CHROMA(4, 4);
> + CHROMA_420(4, 4);
> LUMA(4, 8);
> - CHROMA(2, 4);
> + CHROMA_420(2, 4);
> LUMA(8, 4);
> - CHROMA(4, 2);
> + CHROMA_420(4, 2);
> LUMA(16, 16);
> - CHROMA(8, 8);
> + CHROMA_420(8, 8);
> LUMA(16, 8);
> - CHROMA(8, 4);
> + CHROMA_420(8, 4);
> LUMA(8, 16);
> - CHROMA(4, 8);
> + CHROMA_420(4, 8);
> LUMA(16, 12);
> - CHROMA(8, 6);
> + CHROMA_420(8, 6);
> LUMA(12, 16);
> - CHROMA(6, 8);
> + CHROMA_420(6, 8);
> LUMA(16, 4);
> - CHROMA(8, 2);
> + CHROMA_420(8, 2);
> LUMA(4, 16);
> - CHROMA(2, 8);
> + CHROMA_420(2, 8);
> LUMA(32, 32);
> - CHROMA(16, 16);
> + CHROMA_420(16, 16);
> LUMA(32, 16);
> - CHROMA(16, 8);
> + CHROMA_420(16, 8);
> LUMA(16, 32);
> - CHROMA(8, 16);
> + CHROMA_420(8, 16);
> LUMA(32, 24);
> - CHROMA(16, 12);
> + CHROMA_420(16, 12);
> LUMA(24, 32);
> - CHROMA(12, 16);
> + CHROMA_420(12, 16);
> LUMA(32, 8);
> - CHROMA(16, 4);
> + CHROMA_420(16, 4);
> LUMA(8, 32);
> - CHROMA(4, 16);
> + CHROMA_420(4, 16);
> LUMA(64, 64);
> - CHROMA(32, 32);
> + CHROMA_420(32, 32);
> LUMA(64, 32);
> - CHROMA(32, 16);
> + CHROMA_420(32, 16);
> LUMA(32, 64);
> - CHROMA(16, 32);
> + CHROMA_420(16, 32);
> LUMA(64, 48);
> - CHROMA(32, 24);
> + CHROMA_420(32, 24);
> LUMA(48, 64);
> - CHROMA(24, 32);
> + CHROMA_420(24, 32);
> LUMA(64, 16);
> - CHROMA(32, 8);
> + CHROMA_420(32, 8);
> LUMA(16, 64);
> - CHROMA(8, 32);
> + CHROMA_420(8, 32);
> +
> + CHROMA_444(4, 4);
> + CHROMA_444(8, 8);
> + CHROMA_444(4, 8);
> + CHROMA_444(8, 4);
> + CHROMA_444(16, 16);
> + CHROMA_444(16, 8);
> + CHROMA_444(8, 16);
> + CHROMA_444(16, 12);
> + CHROMA_444(12, 16);
> + CHROMA_444(16, 4);
> + CHROMA_444(4, 16);
> + CHROMA_444(32, 32);
> + CHROMA_444(32, 16);
> + CHROMA_444(16, 32);
> + CHROMA_444(32, 24);
> + CHROMA_444(24, 32);
> + CHROMA_444(32, 8);
> + CHROMA_444(8, 32);
> + CHROMA_444(64, 64);
> + CHROMA_444(64, 32);
> + CHROMA_444(32, 64);
> + CHROMA_444(64, 48);
> + CHROMA_444(48, 64);
> + CHROMA_444(64, 16);
> + CHROMA_444(16, 64);
>
> p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_ps_c<8>;
> p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_ps_c<4>;
> @@ -525,7 +559,9 @@
>
> p.chroma_vsp = filterVertical_sp_c<4>;
> p.luma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
> - p.chroma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
> +
> + p.chroma_p2s[X265_CSP_I444] = filterConvertPelToShort_c<MAX_CU_SIZE>;
> + p.chroma_p2s[X265_CSP_I420] = filterConvertPelToShort_c<MAX_CU_SIZE /
> 2>;
>
> p.extendRowBorder = extendCURowColBorder;
> }
> diff -r 4811da38078c -r f7d21da102ac source/common/pixel.cpp
> --- a/source/common/pixel.cpp Mon Jan 06 23:15:58 2014 -0600
> +++ b/source/common/pixel.cpp Tue Jan 07 16:44:39 2014 +0530
> @@ -805,6 +805,27 @@
> namespace x265 {
> // x265 private namespace
>
> +#define CHROMA_420(W, H) \
> + p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] =
> blockcopy_pp_c<W, H>; \
> + p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] =
> blockcopy_sp_c<W, H>; \
> + p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] =
> blockcopy_ps_c<W, H>; \
> + p.chroma[X265_CSP_I420].sub_ps [CHROMA_ ## W ## x ## H] =
> pixel_sub_ps_c<W, H>; \
> + p.chroma[X265_CSP_I420].add_ps [CHROMA_ ## W ## x ## H] =
> pixel_add_ps_c<W, H>;
> +
> +#define CHROMA_444(W, H) \
> + p.chroma[X265_CSP_I444].copy_pp[LUMA_ ## W ## x ## H] =
> blockcopy_pp_c<W, H>; \
> + p.chroma[X265_CSP_I444].copy_sp[LUMA_ ## W ## x ## H] =
> blockcopy_sp_c<W, H>; \
> + p.chroma[X265_CSP_I444].copy_ps[LUMA_ ## W ## x ## H] =
> blockcopy_ps_c<W, H>; \
> + p.chroma[X265_CSP_I444].sub_ps [LUMA_ ## W ## x ## H] =
> pixel_sub_ps_c<W, H>; \
> + p.chroma[X265_CSP_I444].add_ps [LUMA_ ## W ## x ## H] =
> pixel_add_ps_c<W, H>;
> +
> +#define LUMA(W, H) \
> + p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> + p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> + p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
> + p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
> + p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
> +
> /* It should initialize entries for pixel functions defined in this file.
> */
> void Setup_C_PixelPrimitives(EncoderPrimitives &p)
> {
> @@ -840,69 +861,81 @@
> p.satd[LUMA_64x16] = satd8<64, 16>;
> p.satd[LUMA_16x64] = satd8<16, 64>;
>
> -#define CHROMA(W, H) \
> - p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] =
> blockcopy_pp_c<W, H>; \
> - p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] =
> blockcopy_sp_c<W, H>; \
> - p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] =
> blockcopy_ps_c<W, H>; \
> - p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] =
> pixel_sub_ps_c<W, H>; \
> - p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] =
> pixel_add_ps_c<W, H>;
> -
> -#define LUMA(W, H) \
> - p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> - p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> - p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
> - p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
> - p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
> -
> LUMA(4, 4);
> LUMA(8, 8);
> - CHROMA(4, 4);
> + CHROMA_420(4, 4);
> LUMA(4, 8);
> - CHROMA(2, 4);
> + CHROMA_420(2, 4);
> LUMA(8, 4);
> - CHROMA(4, 2);
> + CHROMA_420(4, 2);
> LUMA(16, 16);
> - CHROMA(8, 8);
> + CHROMA_420(8, 8);
> LUMA(16, 8);
> - CHROMA(8, 4);
> + CHROMA_420(8, 4);
> LUMA(8, 16);
> - CHROMA(4, 8);
> + CHROMA_420(4, 8);
> LUMA(16, 12);
> - CHROMA(8, 6);
> + CHROMA_420(8, 6);
> LUMA(12, 16);
> - CHROMA(6, 8);
> + CHROMA_420(6, 8);
> LUMA(16, 4);
> - CHROMA(8, 2);
> + CHROMA_420(8, 2);
> LUMA(4, 16);
> - CHROMA(2, 8);
> + CHROMA_420(2, 8);
> LUMA(32, 32);
> - CHROMA(16, 16);
> + CHROMA_420(16, 16);
> LUMA(32, 16);
> - CHROMA(16, 8);
> + CHROMA_420(16, 8);
> LUMA(16, 32);
> - CHROMA(8, 16);
> + CHROMA_420(8, 16);
> LUMA(32, 24);
> - CHROMA(16, 12);
> + CHROMA_420(16, 12);
> LUMA(24, 32);
> - CHROMA(12, 16);
> + CHROMA_420(12, 16);
> LUMA(32, 8);
> - CHROMA(16, 4);
> + CHROMA_420(16, 4);
> LUMA(8, 32);
> - CHROMA(4, 16);
> + CHROMA_420(4, 16);
> LUMA(64, 64);
> - CHROMA(32, 32);
> + CHROMA_420(32, 32);
> LUMA(64, 32);
> - CHROMA(32, 16);
> + CHROMA_420(32, 16);
> LUMA(32, 64);
> - CHROMA(16, 32);
> + CHROMA_420(16, 32);
> LUMA(64, 48);
> - CHROMA(32, 24);
> + CHROMA_420(32, 24);
> LUMA(48, 64);
> - CHROMA(24, 32);
> + CHROMA_420(24, 32);
> LUMA(64, 16);
> - CHROMA(32, 8);
> + CHROMA_420(32, 8);
> LUMA(16, 64);
> - CHROMA(8, 32);
> + CHROMA_420(8, 32);
> +
> + CHROMA_444(4, 4);
> + CHROMA_444(8, 8);
> + CHROMA_444(4, 8);
> + CHROMA_444(8, 4);
> + CHROMA_444(16, 16);
> + CHROMA_444(16, 8);
> + CHROMA_444(8, 16);
> + CHROMA_444(16, 12);
> + CHROMA_444(12, 16);
> + CHROMA_444(16, 4);
> + CHROMA_444(4, 16);
> + CHROMA_444(32, 32);
> + CHROMA_444(32, 16);
> + CHROMA_444(16, 32);
> + CHROMA_444(32, 24);
> + CHROMA_444(24, 32);
> + CHROMA_444(32, 8);
> + CHROMA_444(8, 32);
> + CHROMA_444(64, 64);
> + CHROMA_444(64, 32);
> + CHROMA_444(32, 64);
> + CHROMA_444(64, 48);
> + CHROMA_444(48, 64);
> + CHROMA_444(64, 16);
> + CHROMA_444(16, 64);
>
> SET_FUNC_PRIMITIVE_TABLE_C(sse_pp, sse, pixelcmp_t, pixel, pixel)
> SET_FUNC_PRIMITIVE_TABLE_C(sse_sp, sse, pixelcmp_sp_t, int16_t, pixel)
> diff -r 4811da38078c -r f7d21da102ac source/common/primitives.h
> --- a/source/common/primitives.h Mon Jan 06 23:15:58 2014 -0600
> +++ b/source/common/primitives.h Tue Jan 07 16:44:39 2014 +0530
> @@ -75,7 +75,7 @@
> // 4:2:0 chroma partition sizes. These enums are just a convenience for
> indexing into the
> // chroma primitive arrays when instantiating templates. The function
> tables should always
> // be indexed by the luma partition enum
> -enum Chroma420Partions
> +enum Chroma420Partitions
> {
> CHROMA_2x2, // never used by HEVC
> CHROMA_4x4, CHROMA_4x2, CHROMA_2x4,
> @@ -240,7 +240,7 @@
> ipfilter_ps_t ipfilter_ps[NUM_IPFILTER_P_S];
> ipfilter_ss_t ipfilter_ss[NUM_IPFILTER_S_S];
> filter_p2s_t luma_p2s;
> - filter_p2s_t chroma_p2s;
> + filter_p2s_t chroma_p2s[NUM_CHROMA_PARTITIONS];
> ipfilter_sp_t chroma_vsp;
>
> weightp_sp_t weight_sp;
> diff -r 4811da38078c -r f7d21da102ac source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Mon Jan 06 23:15:58 2014 -0600
> +++ b/source/encoder/encoder.cpp Tue Jan 07 16:44:39 2014 +0530
> @@ -1288,6 +1288,8 @@
> bEnableRDOQTS = 0;
> }
>
> + m_csp = _param->internalCsp;
> +
> //====== Coding Tools ========
>
> uint32_t tuQTMaxLog2Size = g_convertToBit[_param->maxCUSize] + 2 - 1;
> diff -r 4811da38078c -r f7d21da102ac source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Mon Jan 06 23:15:58 2014 -0600
> +++ b/source/encoder/frameencoder.cpp Tue Jan 07 16:44:39 2014 +0530
> @@ -330,11 +330,11 @@
> // instead we weight the distortion of chroma.
> int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() +
> slice->getSliceQpDeltaCb();
> int qpc = Clip3(0, 70, qp + chromaQPOffset);
> - double cbWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into
> account of the chroma qp mapping and chroma qp Offset
> + double cbWeight = pow(2.0, (qp -
> g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into
> account of the chroma qp mapping and chroma qp Offset
>
> chromaQPOffset = slice->getPPS()->getChromaCrQpOffset() +
> slice->getSliceQpDeltaCr();
> qpc = Clip3(0, 70, qp + chromaQPOffset);
> - double crWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into
> account of the chroma qp mapping and chroma qp Offset
> + double crWeight = pow(2.0, (qp -
> g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into
> account of the chroma qp mapping and chroma qp Offset
> double chromaLambda = lambda / crWeight;
>
> m_rows[row].m_search.setQPLambda(qp, lambda, chromaLambda);
> @@ -369,10 +369,10 @@
> int qpc;
> int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() +
> slice->getSliceQpDeltaCb();
> qpc = Clip3(0, 70, qp + chromaQPOffset);
> - double cbWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into
> account of the chroma qp mapping and chroma qp Offset
> + double cbWeight = pow(2.0, (qp -
> g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into
> account of the chroma qp mapping and chroma qp Offset
> chromaQPOffset = slice->getPPS()->getChromaCrQpOffset() +
> slice->getSliceQpDeltaCr();
> qpc = Clip3(0, 70, qp + chromaQPOffset);
> - double crWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into
> account of the chroma qp mapping and chroma qp Offset
> + double crWeight = pow(2.0, (qp -
> g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into
> account of the chroma qp mapping and chroma qp Offset
> double chromaLambda = lambda / crWeight;
>
> // NOTE: set SAO lambda every Frame
> diff -r 4811da38078c -r f7d21da102ac source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp Mon Jan 06 23:15:58 2014 -0600
> +++ b/source/encoder/framefilter.cpp Tue Jan 07 16:44:39 2014 +0530
> @@ -64,6 +64,9 @@
> m_cfg = top;
> m_numRows = numRows;
>
> + m_hChromaShift = CHROMA_H_SHIFT(m_cfg->getColorFormat());
> + m_vChromaShift = CHROMA_V_SHIFT(m_cfg->getColorFormat());
> +
> // NOTE: for sao only, I write this code because I want to exact
> match with HM's bug bitstream
> m_rdGoOnSbacCoderRow0 = rdGoOnSbacCoder;
>
> @@ -77,7 +80,7 @@
> m_sao.setSaoLcuBoundary(top->param.saoLcuBoundary);
>
> m_sao.setSaoLcuBasedOptimization(top->param.saoLcuBasedOptimization);
> m_sao.setMaxNumOffsetsPerPic(top->getMaxNumOffsetsPerPic());
> - m_sao.create(top->param.sourceWidth, top->param.sourceHeight,
> g_maxCUWidth, g_maxCUHeight);
> + m_sao.create(top->param.sourceWidth, top->param.sourceHeight,
> g_maxCUWidth, g_maxCUHeight, m_cfg->getColorFormat());
> m_sao.createEncBuffer();
> }
>
> @@ -222,8 +225,8 @@
>
> // Border extend Left and Right
> primitives.extendRowBorder(recon->getLumaAddr(lineStartCUAddr),
> recon->getStride(), recon->getWidth(), realH, recon->getLumaMarginX());
> - primitives.extendRowBorder(recon->getCbAddr(lineStartCUAddr),
> recon->getCStride(), recon->getWidth() >> 1, realH >> 1,
> recon->getChromaMarginX());
> - primitives.extendRowBorder(recon->getCrAddr(lineStartCUAddr),
> recon->getCStride(), recon->getWidth() >> 1, realH >> 1,
> recon->getChromaMarginX());
> + primitives.extendRowBorder(recon->getCbAddr(lineStartCUAddr),
> recon->getCStride(), recon->getWidth() >> m_hChromaShift, realH >>
> m_vChromaShift, recon->getChromaMarginX());
> + primitives.extendRowBorder(recon->getCrAddr(lineStartCUAddr),
> recon->getCStride(), recon->getWidth() >> m_hChromaShift, realH >>
> m_vChromaShift, recon->getChromaMarginX());
>
> // Border extend Top
> if (row == 0)
> @@ -252,8 +255,8 @@
> const intptr_t stride = recon->getStride();
> const intptr_t strideC = recon->getCStride();
> pixel *pixY = recon->getLumaAddr(lineStartCUAddr) -
> recon->getLumaMarginX() + (realH - 1) * stride;
> - pixel *pixU = recon->getCbAddr(lineStartCUAddr) -
> recon->getChromaMarginX() + ((realH >> 1) - 1) * strideC;
> - pixel *pixV = recon->getCrAddr(lineStartCUAddr) -
> recon->getChromaMarginX() + ((realH >> 1) - 1) * strideC;
> + pixel *pixU = recon->getCbAddr(lineStartCUAddr) -
> recon->getChromaMarginX() + ((realH >> m_vChromaShift) - 1) * strideC;
> + pixel *pixV = recon->getCrAddr(lineStartCUAddr) -
> recon->getChromaMarginX() + ((realH >> m_vChromaShift) - 1) * strideC;
>
> for (int y = 0; y < recon->getLumaMarginY(); y++)
> {
> @@ -290,8 +293,8 @@
>
> uint64_t ssdY = computeSSD(orig->getLumaAddr(cuAddr),
> recon->getLumaAddr(cuAddr), stride, width, height);
>
> - height >>= 1;
> - width >>= 1;
> + height >>= m_vChromaShift;
> + width >>= m_hChromaShift;
> stride = recon->getCStride();
>
> uint64_t ssdU = computeSSD(orig->getCbAddr(cuAddr),
> recon->getCbAddr(cuAddr), stride, width, height);
> @@ -337,8 +340,8 @@
>
> updateMD5Plane(m_pic->m_state[0], recon->getLumaAddr(cuAddr),
> width, height, stride);
>
> - width >>= 1;
> - height >>= 1;
> + width >>= m_hChromaShift;
> + height >>= m_vChromaShift;
> stride = recon->getCStride();
>
> updateMD5Plane(m_pic->m_state[1], recon->getCbAddr(cuAddr),
> width, height, stride);
> @@ -356,8 +359,8 @@
> }
> updateCRC(recon->getLumaAddr(cuAddr), m_pic->m_crc[0], height,
> width, stride);
>
> - width >>= 1;
> - height >>= 1;
> + width >>= m_hChromaShift;
> + height >>= m_vChromaShift;
> stride = recon->getCStride();
>
> updateCRC(recon->getCbAddr(cuAddr), m_pic->m_crc[1], height,
> width, stride);
> @@ -374,10 +377,10 @@
> m_pic->m_checksum[0] = m_pic->m_checksum[1] =
> m_pic->m_checksum[2] = 0;
> }
> updateChecksum(recon->getLumaAddr(), m_pic->m_checksum[0],
> height, width, stride, row, cuHeight);
> - width >>= 1;
> - height >>= 1;
> + width >>= m_hChromaShift;
> + height >>= m_vChromaShift;
> stride = recon->getCStride();
> - cuHeight >>= 1;
> + cuHeight >>= m_vChromaShift;
> updateChecksum(recon->getCbAddr(), m_pic->m_checksum[1], height,
> width, stride, row, cuHeight);
> updateChecksum(recon->getCrAddr(), m_pic->m_checksum[2], height,
> width, stride, row, cuHeight);
> }
> diff -r 4811da38078c -r f7d21da102ac source/encoder/framefilter.h
> --- a/source/encoder/framefilter.h Mon Jan 06 23:15:58 2014 -0600
> +++ b/source/encoder/framefilter.h Tue Jan 07 16:44:39 2014 +0530
> @@ -59,6 +59,9 @@
> TEncCfg* m_cfg;
> TComPic* m_pic;
>
> + int m_hChromaShift;
> + int m_vChromaShift;
> +
> public:
>
> TComLoopFilter m_loopFilter;
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140107/3cba5caa/attachment-0001.html>
More information about the x265-devel
mailing list