<div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial">ÔÚ 2013-11-13 07:02:33£¬"Steve Borho" <steve@borho.org> дµÀ£º<BR>
<BLOCKQUOTE id="isReplyContent" style="PADDING-LEFT: 1ex; MARGIN: 0px 0px 0px 0.8ex; BORDER-LEFT: #ccc 1px solid">
<DIV dir="ltr"><BR>
<DIV class="gmail_extra"><BR><BR>
<DIV class="gmail_quote">On Tue, Nov 12, 2013 at 2:17 AM, Min Chen <SPAN dir="ltr"><<A href="mailto:chenm003@163.com" target="_blank">chenm003@163.com</A>></SPAN> wrote:<BR>
<BLOCKQUOTE class="gmail_quote" style="PADDING-LEFT: 1ex; MARGIN: 0px 0px 0px 0.8ex; BORDER-LEFT: #ccc 1px solid"># HG changeset patch<BR># User Min Chen <<A href="mailto:chenm003@163.com">chenm003@163.com</A>><BR># Date 1384244049 -28800<BR># Node ID 8c731f8c71ff6f42718a80934433a154417caeec<BR># Parent 1ca01c82609fbb173a665c31adf07c429806d4f1<BR>cleanup: hardcoded m_qtTempTComYuv[qtLayer].m_width to MAX_CU_SIZE<BR></BLOCKQUOTE>
<DIV><BR></DIV>
<DIV>neither of these apply cleanly</DIV>
<DIV> </DIV>
<DIV>use fixed stride to reduce number of parameters </DIV>
<DIV> </DIV>
<BLOCKQUOTE class="gmail_quote" style="PADDING-LEFT: 1ex; MARGIN: 0px 0px 0px 0.8ex; BORDER-LEFT: #ccc 1px solid"><BR>diff -r 1ca01c82609f -r 8c731f8c71ff source/Lib/TLibEncoder/TEncSearch.cpp<BR>--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Nov 11 15:46:00 2013 +0530<BR>+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Nov 12 16:14:09 2013 +0800<BR>@@ -436,7 +436,7 @@<BR> TCoeff* coeff = m_qtTempCoeffY[qtLayer] + numCoeffPerInc * absPartIdx;<BR><BR> int16_t* reconQt = m_qtTempTComYuv[qtLayer].getLumaAddr(absPartIdx);<BR>- uint32_t reconQtStride = m_qtTempTComYuv[qtLayer].m_width;<BR>+ assert(m_qtTempTComYuv[qtLayer].m_width == MAX_CU_SIZE);<BR><BR> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<BR> Pel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<BR>@@ -502,7 +502,7 @@<BR> }<BR><BR> //===== reconstruction =====<BR>- primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);<BR>+ primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);<BR><BR> //===== update distortion =====<BR> outDist += primitives.sse_pp[part](fenc, stride, recon, stride);<BR>@@ -548,7 +548,7 @@<BR> uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> 2;<BR> TCoeff* coeff = (chromaId > 0 ? m_qtTempCoeffCr[qtlayer] : m_qtTempCoeffCb[qtlayer]) + numCoeffPerInc * absPartIdx;<BR> int16_t* reconQt = (chromaId > 0 ? m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx) : m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx));<BR>- uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;<BR>+ assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<BR><BR> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<BR> Pel* reconIPred = (chromaId > 0 ? cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder));<BR>@@ -636,7 +636,7 @@<BR> }<BR><BR> //===== reconstruction =====<BR>- primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);<BR>+ primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, MAX_CU_SIZE / 2, reconIPredStride);<BR><BR> //===== update distortion =====<BR> uint32_t dist = primitives.sse_pp[part](fenc, stride, recon, stride);<BR>@@ -954,24 +954,24 @@<BR> uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;<BR> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<BR> int16_t* src = m_qtTempTComYuv[qtLayer].getLumaAddr(absPartIdx);<BR>- uint32_t srcstride = m_qtTempTComYuv[qtLayer].m_width;<BR>+ assert(m_qtTempTComYuv[qtLayer].m_width == MAX_CU_SIZE);<BR> Pel* dst = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<BR> uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();<BR>- primitives.blockcpy_ps(width, height, dst, dststride, src, srcstride);<BR>+ primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE);<BR><BR> if (!bLumaOnly)<BR> {<BR> width >>= 1;<BR> height >>= 1;<BR> src = m_qtTempTComYuv[qtLayer].getCbAddr(absPartIdx);<BR>- srcstride = m_qtTempTComYuv[qtLayer].m_cwidth;<BR>+ assert(m_qtTempTComYuv[qtLayer].m_cwidth == MAX_CU_SIZE / 2);<BR> dst = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);<BR> dststride = cu->getPic()->getPicYuvRec()->getCStride();<BR>- primitives.blockcpy_ps(width, height, dst, dststride, src, srcstride);<BR>+ primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE / 2);<BR><BR> src = m_qtTempTComYuv[qtLayer].getCrAddr(absPartIdx);<BR> dst = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);<BR>- primitives.blockcpy_ps(width, height, dst, dststride, src, srcstride);<BR>+ primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE / 2);<BR> }<BR> }<BR><BR>@@ -1134,10 +1134,10 @@<BR> Pel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zOrder);<BR> uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();<BR> int16_t* reconQt = m_qtTempTComYuv[qtlayer].getLumaAddr(absPartIdx);<BR>- uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_width;<BR>+ assert(m_qtTempTComYuv[qtlayer].m_width == MAX_CU_SIZE);<BR> uint32_t width = cu->getWidth(0) >> trDepth;<BR> uint32_t height = cu->getHeight(0) >> trDepth;<BR>- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);<BR>+ primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE);<BR><BR> if (!bLumaOnly && !bSkipChroma)<BR> {<BR>@@ -1146,12 +1146,12 @@<BR> reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zOrder);<BR> reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();<BR> reconQt = m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx);<BR>- reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;<BR>- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);<BR>+ assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<BR>+ primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);<BR><BR> reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zOrder);<BR> reconQt = m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx);<BR>- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);<BR>+ primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);<BR> }<BR> }<BR><BR>@@ -1255,20 +1255,20 @@<BR> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<BR> uint32_t width = cu->getWidth(0) >> (trDepth + 1);<BR> uint32_t height = cu->getHeight(0) >> (trDepth + 1);<BR>- uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;<BR>+ assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<BR> uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();<BR><BR> if (stateU0V1Both2 == 0 || stateU0V1Both2 == 2)<BR> {<BR> Pel* reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);<BR> int16_t* reconQt = m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx);<BR>- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);<BR>+ primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);<BR> }<BR> if (stateU0V1Both2 == 1 || stateU0V1Both2 == 2)<BR> {<BR> Pel* reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);<BR> int16_t* reconQt = m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx);<BR>- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);<BR>+ primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);<BR> }<BR> }<BR> }<BR>@@ -3182,10 +3182,10 @@<BR><BR> int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];<BR> assert(scalingListType < 6);<BR>- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, m_qtTempTComYuv[qtlayer].m_width, coeffCurY, trWidth, trHeight, scalingListType, false, lastPosY); //this is for inter mode only<BR>-<BR>- const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx),<BR>- m_qtTempTComYuv[qtlayer].m_width);<BR>+ assert(m_qtTempTComYuv[qtlayer].m_width == MAX_CU_SIZE);<BR>+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, MAX_CU_SIZE, coeffCurY, trWidth, trHeight, scalingListType, false, lastPosY); //this is for inter mode only<BR>+<BR>+ const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx), MAX_CU_SIZE);<BR> if (cu->isLosslessCoded(0))<BR> {<BR> distY = nonZeroDistY;<BR>@@ -3227,10 +3227,10 @@<BR> if (!absSumY)<BR> {<BR> int16_t *ptr = m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx);<BR>- const uint32_t stride = m_qtTempTComYuv[qtlayer].m_width;<BR>+ assert(m_qtTempTComYuv[qtlayer].m_width == MAX_CU_SIZE);<BR><BR> assert(trWidth == trHeight);<BR>- primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr, stride, 0);<BR>+ primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr, MAX_CU_SIZE, 0);<BR> }<BR><BR> uint32_t distU = 0;<BR>@@ -3254,11 +3254,12 @@<BR><BR> int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];<BR> assert(scalingListType < 6);<BR>- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, pcResiCurrU, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurU, trWidthC, trHeightC, scalingListType, false, lastPosU);<BR>+ assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<BR>+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, pcResiCurrU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC, scalingListType, false, lastPosU);<BR><BR> uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth,<BR> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC),<BR>- m_qtTempTComYuv[qtlayer].m_cwidth);<BR>+ MAX_CU_SIZE / 2);<BR> const uint32_t nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);<BR><BR> if (cu->isLosslessCoded(0))<BR>@@ -3301,10 +3302,10 @@<BR> if (!absSumU)<BR> {<BR> int16_t *ptr = m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);<BR>- const uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;<BR>+ assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<BR><BR> assert(trWidthC == trHeightC);<BR>- primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, stride, 0);<BR>+ primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, MAX_CU_SIZE / 2, 0);<BR> }<BR><BR> distV = m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, m_tempPel, trWidthC));<BR>@@ -3320,11 +3321,12 @@<BR><BR> int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];<BR> assert(scalingListType < 6);<BR>- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurV, trWidthC, trHeightC, scalingListType, false, lastPosV);<BR>+ assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<BR>+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, MAX_CU_SIZE / 2, coeffCurV, trWidthC, trHeightC, scalingListType, false, lastPosV);<BR><BR> uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth,<BR> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC),<BR>- m_qtTempTComYuv[qtlayer].m_cwidth);<BR>+ MAX_CU_SIZE / 2);<BR> const uint32_t nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);<BR><BR> if (cu->isLosslessCoded(0))<BR>@@ -3367,10 +3369,10 @@<BR> if (!absSumV)<BR> {<BR> int16_t *ptr = m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);<BR>- const uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;<BR>+ assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<BR><BR> assert(trWidthC == trHeightC);<BR>- primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, stride, 0);<BR>+ primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, MAX_CU_SIZE / 2, 0);<BR> }<BR> }<BR> cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);<BR>@@ -3387,7 +3389,7 @@<BR> UInt64 singleCostY = MAX_INT64;<BR><BR> int16_t *curResiY = m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx);<BR>- uint32_t resiStride = m_qtTempTComYuv[qtlayer].m_width;<BR>+ assert(m_qtTempTComYuv[qtlayer].m_width == MAX_CU_SIZE);<BR><BR> TCoeff bestCoeffY[32 * 32];<BR> memcpy(bestCoeffY, coeffCurY, sizeof(TCoeff) * numSamplesLuma);<BR>@@ -3395,7 +3397,7 @@<BR> int16_t bestResiY[32 * 32];<BR> for (int i = 0; i < trHeight; ++i)<BR> {<BR>- memcpy(bestResiY + i * trWidth, curResiY + i * resiStride, sizeof(int16_t) * trWidth);<BR>+ memcpy(bestResiY + i * trWidth, curResiY + i * MAX_CU_SIZE, sizeof(int16_t) * trWidth);<BR> }<BR><BR> m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);<BR>@@ -3425,12 +3427,13 @@<BR><BR> int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];<BR> assert(scalingListType < 6);<BR>-<BR>- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, m_qtTempTComYuv[qtlayer].m_width, coeffCurY, trWidth, trHeight, scalingListType, true, lastPosTransformSkipY);<BR>+ assert(m_qtTempTComYuv[qtlayer].m_width == MAX_CU_SIZE);<BR>+<BR>+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, MAX_CU_SIZE, coeffCurY, trWidth, trHeight, scalingListType, true, lastPosTransformSkipY);<BR><BR> nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width,<BR> m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx),<BR>- m_qtTempTComYuv[qtlayer].m_width);<BR>+ MAX_CU_SIZE);<BR><BR> singleCostY = m_rdCost->calcRdCost(nonZeroDistY, skipSingleBitsY);<BR> }<BR>@@ -3441,7 +3444,7 @@<BR> memcpy(coeffCurY, bestCoeffY, sizeof(TCoeff) * numSamplesLuma);<BR> for (int i = 0; i < trHeight; ++i)<BR> {<BR>- memcpy(curResiY + i * resiStride, &bestResiY[i * trWidth], sizeof(int16_t) * trWidth);<BR>+ memcpy(curResiY + i * MAX_CU_SIZE, &bestResiY[i * trWidth], sizeof(int16_t) * trWidth);<BR> }<BR> }<BR> else<BR>@@ -3463,7 +3466,7 @@<BR><BR> int16_t *curResiU = m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);<BR> int16_t *curResiV = m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);<BR>- uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;<BR>+ assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<BR><BR> TCoeff bestCoeffU[32 * 32], bestCoeffV[32 * 32];<BR> memcpy(bestCoeffU, coeffCurU, sizeof(TCoeff) * numSamplesChroma);<BR>@@ -3472,8 +3475,8 @@<BR> int16_t bestResiU[32 * 32], bestResiV[32 * 32];<BR> for (int i = 0; i < trHeightC; ++i)<BR> {<BR>- memcpy(&bestResiU[i * trWidthC], curResiU + i * stride, sizeof(int16_t) * trWidthC);<BR>- memcpy(&bestResiV[i * trWidthC], curResiV + i * stride, sizeof(int16_t) * trWidthC);<BR>+ memcpy(&bestResiU[i * trWidthC], curResiU + i * (MAX_CU_SIZE / 2), sizeof(int16_t) * trWidthC);<BR>+ memcpy(&bestResiV[i * trWidthC], curResiV + i * (MAX_CU_SIZE / 2), sizeof(int16_t) * trWidthC);<BR> }<BR><BR> m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);<BR>@@ -3515,12 +3518,13 @@<BR><BR> int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];<BR> assert(scalingListType < 6);<BR>-<BR>- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiU, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurU, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipU);<BR>+ assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<BR>+<BR>+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipU);<BR><BR> uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth,<BR> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC),<BR>- m_qtTempTComYuv[qtlayer].m_cwidth);<BR>+ MAX_CU_SIZE / 2);<BR> nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);<BR> singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsU);<BR> }<BR>@@ -3532,7 +3536,7 @@<BR> memcpy(coeffCurU, bestCoeffU, sizeof(TCoeff) * numSamplesChroma);<BR> for (int i = 0; i < trHeightC; ++i)<BR> {<BR>- memcpy(curResiU + i * stride, &bestResiU[i * trWidthC], sizeof(int16_t) * trWidthC);<BR>+ memcpy(curResiU + i * (MAX_CU_SIZE / 2), &bestResiU[i * trWidthC], sizeof(int16_t) * trWidthC);<BR> }<BR> }<BR> else<BR>@@ -3553,12 +3557,13 @@<BR><BR> int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];<BR> assert(scalingListType < 6);<BR>-<BR>- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurV, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipV);<BR>+ assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<BR>+<BR>+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, MAX_CU_SIZE / 2, coeffCurV, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipV);<BR><BR> uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth,<BR> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC),<BR>- m_qtTempTComYuv[qtlayer].m_cwidth);<BR>+ MAX_CU_SIZE / 2);<BR> nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);<BR> singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsV);<BR> }<BR>@@ -3570,7 +3575,7 @@<BR> memcpy(coeffCurV, bestCoeffV, sizeof(TCoeff) * numSamplesChroma);<BR> for (int i = 0; i < trHeightC; ++i)<BR> {<BR>- memcpy(curResiV + i * stride, &bestResiV[i * trWidthC], sizeof(int16_t) * trWidthC);<BR>+ memcpy(curResiV + i * (MAX_CU_SIZE / 2), &bestResiV[i * trWidthC], sizeof(int16_t) * trWidthC);<BR> }<BR> }<BR> else<BR>diff -r 1ca01c82609f -r 8c731f8c71ff source/common/TShortYUV.cpp<BR>--- a/source/common/TShortYUV.cpp Mon Nov 11 15:46:00 2013 +0530<BR>+++ b/source/common/TShortYUV.cpp Tue Nov 12 16:14:09 2013 +0800<BR>@@ -123,10 +123,6 @@<BR> addClipChroma(srcYuv0, srcYuv1, trUnitIdx, partSize >> m_hChromaShift);<BR> }<BR><BR>-#if _MSC_VER<BR>-#pragma warning (disable: 4244)<BR>-#endif<BR>-<BR> void TShortYUV::addClipLuma(TShortYUV* srcYuv0, TShortYUV* srcYuv1, unsigned int trUnitIdx, unsigned int partSize)<BR> {<BR> int16_t* src0 = srcYuv0->getLumaAddr(trUnitIdx, partSize);<BR><BR>_______________________________________________<BR>x265-devel mailing list<BR><A href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</A><BR><A href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</A><BR></BLOCKQUOTE></DIV><BR><BR clear="all">
<DIV><BR></DIV>-- <BR>Steve Borho </DIV></DIV></BLOCKQUOTE></div>