<div dir="ltr">Thanks Satoshi. Pushed for testing.<br></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Jun 2, 2014 at 8:17 AM, Satoshi Nakagawa <span dir="ltr"><<a href="mailto:nakagawa424@oki.com" target="_blank">nakagawa424@oki.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Satoshi Nakagawa <<a href="mailto:nakagawa424@oki.com">nakagawa424@oki.com</a>><br>
# Date 1401677099 -32400<br>
# Mon Jun 02 11:44:59 2014 +0900<br>
# Node ID 73f86312c2e0aa5a105e84b0045478e02c8a03e7<br>
# Parent a5998df9b12ef81e48e7c5b89219a74276a75f27<br>
refine cbf==0 path: remove clearing coeff and resi<br>
<br>
diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncEntropy.cpp<br>
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp Mon Jun 02 07:36:20 2014 +0530<br>
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Mon Jun 02 11:44:59 2014 +0900<br>
@@ -202,7 +202,6 @@<br>
<br>
void TEncEntropy::initTUEntropySection(TComTURecurse *tuIterator, uint32_t splitMode, uint32_t absPartIdxStep, uint32_t m_absPartIdxTU)<br>
{<br>
- tuIterator->m_partOffset = 0;<br>
tuIterator->m_section = 0;<br>
tuIterator->m_absPartIdxTURelCU = m_absPartIdxTU;<br>
tuIterator->m_splitMode = splitMode;<br>
diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncEntropy.h<br>
--- a/source/Lib/TLibEncoder/TEncEntropy.h Mon Jun 02 07:36:20 2014 +0530<br>
+++ b/source/Lib/TLibEncoder/TEncEntropy.h Mon Jun 02 11:44:59 2014 +0900<br>
@@ -66,7 +66,6 @@<br>
uint32_t m_splitMode;<br>
uint32_t m_absPartIdxTURelCU;<br>
uint32_t m_absPartIdxStep;<br>
- uint32_t m_partOffset;<br>
};<br>
<br>
// ====================================================================================================================<br>
diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncSbac.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSbac.cpp Mon Jun 02 07:36:20 2014 +0530<br>
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp Mon Jun 02 11:44:59 2014 +0900<br>
@@ -2120,8 +2120,9 @@<br>
// compute number of significant coefficients<br>
uint32_t numSig = primitives.count_nonzero(coeff, trSize * trSize);<br>
<br>
- if (numSig == 0)<br>
- return;<br>
+#if CHECKED_BUILD || _DEBUG<br>
+ X265_CHECK(numSig > 0, "cbf check fail");<br>
+#endif<br>
<br>
bool beValid;<br>
if (cu->getCUTransquantBypass(absPartIdx))<br>
diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncSearch.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 02 07:36:20 2014 +0530<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 02 11:44:59 2014 +0900<br>
@@ -408,8 +408,8 @@<br>
coeff_t* coeff = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;<br>
<br>
int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);<br>
-<br>
X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");<br>
+ const uint32_t reconQtStride = MAX_CU_SIZE;<br>
<br>
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<br>
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<br>
@@ -443,25 +443,29 @@<br>
//--- set coded block flag ---<br>
cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);<br>
<br>
- //--- inverse transform ---<br>
if (absSum)<br>
{<br>
+ //--- inverse transform ---<br>
int scalingListType = 0 + TEXT_LUMA;<br>
- X265_CHECK(scalingListType < 6, "scalingListType is too large %d\n", scalingListType);<br>
+ X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);<br>
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);<br>
+ X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);<br>
+ //===== reconstruction =====<br>
+ primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);<br>
+ //===== update distortion =====<br>
+ outDist += primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);<br>
}<br>
else<br>
{<br>
- int16_t* resiTmp = residual;<br>
+#if CHECKED_BUILD || _DEBUG<br>
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);<br>
- primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);<br>
+#endif<br>
+ //===== reconstruction =====<br>
+ primitives.luma_copy_ps[part](reconQt, reconQtStride, pred, stride);<br>
+ primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, stride);<br>
+ //===== update distortion =====<br>
+ outDist += primitives.sse_pp[part](pred, stride, fenc, stride);<br>
}<br>
-<br>
- X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);<br>
- //===== reconstruction =====<br>
- primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);<br>
- //===== update distortion =====<br>
- outDist += primitives.sse_sp[part](reconQt, MAX_CU_SIZE, fenc, stride);<br>
}<br>
<br>
void TEncSearch::xIntraCodingChromaBlk(TComDataCU* cu,<br>
@@ -519,67 +523,67 @@<br>
primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);<br>
<br>
//===== transform and quantization =====<br>
+ //--- init rate estimation arrays for RDOQ ---<br>
+ if (useTransformSkipChroma ? m_cfg->bEnableRDOQTS : m_cfg->bEnableRDOQ)<br>
{<br>
- //--- init rate estimation arrays for RDOQ ---<br>
- if (useTransformSkipChroma ? m_cfg->bEnableRDOQTS : m_cfg->bEnableRDOQ)<br>
- {<br>
- m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, tuSize, ttype);<br>
- }<br>
- //--- transform and quantization ---<br>
- uint32_t absSum = 0;<br>
- int lastPos = -1;<br>
-<br>
- int curChromaQpOffset;<br>
- if (ttype == TEXT_CHROMA_U)<br>
- {<br>
- curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();<br>
- }<br>
- else<br>
- {<br>
- curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
- }<br>
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);<br>
-<br>
- m_trQuant->selectLambda(TEXT_CHROMA);<br>
-<br>
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipChroma);<br>
-<br>
- //--- set coded block flag ---<br>
- cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdx, absPartIdxStep);<br>
-<br>
+ m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, tuSize, ttype);<br>
+ }<br>
+<br>
+ //--- transform and quantization ---<br>
+ uint32_t absSum = 0;<br>
+ int lastPos = -1;<br>
+<br>
+ int curChromaQpOffset;<br>
+ if (ttype == TEXT_CHROMA_U)<br>
+ {<br>
+ curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();<br>
+ }<br>
+ else<br>
+ {<br>
+ curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
+ }<br>
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);<br>
+ m_trQuant->selectLambda(TEXT_CHROMA);<br>
+<br>
+ absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipChroma);<br>
+<br>
+ //--- set coded block flag ---<br>
+ cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdx, absPartIdxStep);<br>
+<br>
+ uint32_t dist;<br>
+ if (absSum)<br>
+ {<br>
//--- inverse transform ---<br>
- if (absSum)<br>
- {<br>
- int scalingListType = 0 + ttype;<br>
- X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);<br>
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);<br>
- }<br>
- else<br>
- {<br>
- int16_t* resiTmp = residual;<br>
- memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);<br>
- primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);<br>
- }<br>
+ int scalingListType = 0 + ttype;<br>
+ X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);<br>
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);<br>
+ X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);<br>
+ //===== reconstruction =====<br>
+ primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);<br>
+ //===== update distortion =====<br>
+ dist = primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);<br>
}<br>
-<br>
- X265_CHECK(((intptr_t)residual & (tuSize - 1)) == 0, "residual alignment check failure\n");<br>
- X265_CHECK(tuSize <= 32, "tuSize invalud\n");<br>
- //===== reconstruction =====<br>
- primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);<br>
- //===== update distortion =====<br>
- uint32_t dist = primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);<br>
+ else<br>
+ {<br>
+#if CHECKED_BUILD || _DEBUG<br>
+ memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);<br>
+#endif<br>
+ //===== reconstruction =====<br>
+ primitives.square_copy_ps[sizeIdx](reconQt, reconQtStride, pred, stride);<br>
+ primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);<br>
+ //===== update distortion =====<br>
+ dist = primitives.sse_pp[part](pred, stride, fenc, stride);<br>
+ }<br>
+<br>
+ X265_CHECK(ttype == TEXT_CHROMA_U || ttype == TEXT_CHROMA_V, "invalid ttype\n");<br>
if (ttype == TEXT_CHROMA_U)<br>
{<br>
outDist += m_rdCost->scaleChromaDistCb(dist);<br>
}<br>
- else if (ttype == TEXT_CHROMA_V)<br>
+ else<br>
{<br>
outDist += m_rdCost->scaleChromaDistCr(dist);<br>
}<br>
- else<br>
- {<br>
- outDist += dist;<br>
- }<br>
}<br>
<br>
void TEncSearch::xRecurIntraCodingQT(TComDataCU* cu,<br>
@@ -784,15 +788,15 @@<br>
cu->setTransformSkipSubParts(bestModeId, TEXT_LUMA, absPartIdx, fullDepth);<br>
<br>
//--- set reconstruction for next intra prediction blocks ---<br>
- uint32_t width = cu->getCUSize(0) >> trDepth;<br>
- uint32_t height = cu->getCUSize(0) >> trDepth;<br>
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;<br>
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<br>
int16_t* src = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);<br>
X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");<br>
+ const uint32_t srcstride = MAX_CU_SIZE;<br>
pixel* dst = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<br>
uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();<br>
- primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE);<br>
+ int sizeIdx = trSizeLog2 - 2;<br>
+ primitives.square_copy_sp[sizeIdx](dst, dststride, src, srcstride);<br>
}<br>
<br>
outDistY += singleDistY;<br>
@@ -866,25 +870,29 @@<br>
//--- set coded block flag ---<br>
cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);<br>
<br>
- //--- inverse transform ---<br>
+ int part = partitionFromSize(tuSize);<br>
+<br>
if (absSum)<br>
{<br>
+ //--- inverse transform ---<br>
int scalingListType = 0 + TEXT_LUMA;<br>
X265_CHECK(scalingListType < 6, "scalingListType %d\n", scalingListType);<br>
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);<br>
+<br>
+ // Generate Recon<br>
+ primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);<br>
+ primitives.luma_copy_pp[part](reconIPred, reconIPredStride, recon, stride);<br>
}<br>
else<br>
{<br>
- int16_t* resiTmp = residual;<br>
+#if CHECKED_BUILD || _DEBUG<br>
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);<br>
- primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);<br>
+#endif<br>
+<br>
+ // Generate Recon<br>
+ primitives.luma_copy_pp[part](recon, stride, pred, stride);<br>
+ primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, stride);<br>
}<br>
-<br>
- //Generate Recon<br>
- X265_CHECK(tuSize <= 32, "tuSize is too large\n");<br>
- int part = partitionFromSize(tuSize);<br>
- primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);<br>
- primitives.blockcpy_pp(tuSize, tuSize, reconIPred, reconIPredStride, recon, stride);<br>
}<br>
<br>
if (bCheckSplit && !bCheckFull)<br>
@@ -980,8 +988,10 @@<br>
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zOrder);<br>
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();<br>
int16_t* reconQt = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);<br>
- primitives.blockcpy_ps(trSize, trSize, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE);<br>
X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");<br>
+ const uint32_t reconQtStride = MAX_CU_SIZE;<br>
+ int sizeIdx = trSizeLog2 - 2;<br>
+ primitives.square_copy_sp[sizeIdx](reconIPred, reconIPredStride, reconQt, reconQtStride);<br>
}<br>
<br>
void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)<br>
@@ -1059,8 +1069,7 @@<br>
}<br>
<br>
//===== copy transform coefficients =====<br>
- uint32_t trSizeC = 1 << trSizeCLog2;<br>
- uint32_t numCoeffC = 1 << trSizeCLog2 * 2;<br>
+ uint32_t numCoeffC = 1 << (trSizeCLog2 * 2);<br>
uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));<br>
<br>
coeff_t* coeffDst = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;<br>
@@ -1072,12 +1081,13 @@<br>
m_qtTempTransformSkipYuv.copyPartToPartChroma(&m_qtTempShortYuv[qtlayer], absPartIdx, lumaSize, chromaId, splitIntoSubTUs);<br>
<br>
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<br>
- uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;<br>
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();<br>
<br>
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);<br>
int16_t* reconQt = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);<br>
- primitives.blockcpy_ps(trSizeC, trSizeC, reconIPred, reconIPredStride, reconQt, reconQtStride);<br>
+ uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;<br>
+ int sizeIdxC = trSizeCLog2 - 2;<br>
+ primitives.square_copy_sp[sizeIdxC](reconIPred, reconIPredStride, reconQt, reconQtStride);<br>
}<br>
}<br>
<br>
@@ -1387,6 +1397,7 @@<br>
uint32_t stride = fencYuv->getCStride();<br>
const bool splitIntoSubTUs = (chFmt == CHROMA_422);<br>
int sizeIdx = g_convertToBit[tuSize];<br>
+ int part = partitionFromSize(tuSize);<br>
<br>
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)<br>
{<br>
@@ -1456,28 +1467,28 @@<br>
//--- set coded block flag ---<br>
cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
<br>
- //--- inverse transform ---<br>
if (absSum)<br>
{<br>
+ //--- inverse transform ---<br>
int scalingListType = 0 + ttype;<br>
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);<br>
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);<br>
+<br>
+ //===== reconstruction =====<br>
+ // use square primitives<br>
+ primitives.chroma[CHROMA_444].add_ps[part](recon, stride, pred, residual, stride, stride);<br>
+ primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, recon, stride);<br>
}<br>
else<br>
{<br>
- int16_t* resiTmp = residual;<br>
+#if CHECKED_BUILD || _DEBUG<br>
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);<br>
- primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);<br>
+#endif<br>
+<br>
+ //===== reconstruction =====<br>
+ primitives.square_copy_pp[sizeIdx](recon, stride, pred, stride);<br>
+ primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);<br>
}<br>
-<br>
- //===== reconstruction =====<br>
- X265_CHECK(((intptr_t)residual & (tuSize - 1)) == 0, "residual alignment check failed\n");<br>
- X265_CHECK(tuSize <= 32, "tuSize out of range\n");<br>
-<br>
- // use square primitive<br>
- int part = partitionFromSize(tuSize);<br>
- primitives.chroma[CHROMA_444].add_ps[part](recon, stride, pred, residual, stride, stride);<br>
- primitives.chroma[CHROMA_444].copy_pp[part](reconIPred, reconIPredStride, recon, stride);<br>
}<br>
while (isNextSection(&tuIterator));<br>
<br>
@@ -1859,7 +1870,6 @@<br>
<br>
void TEncSearch::initSection(TComTURecurse *tuIterator, uint32_t splitMode, uint32_t absPartIdxStep, uint32_t m_absPartIdxTU)<br>
{<br>
- tuIterator->m_partOffset = 0;<br>
tuIterator->m_section = 0;<br>
tuIterator->m_absPartIdxTURelCU = m_absPartIdxTU;<br>
tuIterator->m_splitMode = splitMode;<br>
@@ -1874,16 +1884,21 @@<br>
{<br>
uint32_t depth = cu->getDepth(0);<br>
uint32_t initTrDepth = (cu->getPartitionSize(0) != SIZE_2Nx2N) && (cu->getChromaFormat() == CHROMA_444 ? 1 : 0);<br>
-<br>
+ uint32_t tuSize = cu->getCUSize(0) >> initTrDepth;<br>
uint32_t splitMode = (initTrDepth == 0) ? DONT_SPLIT : QUAD_SPLIT;<br>
uint32_t absPartIdx = (cu->getPic()->getNumPartInCU() >> (depth << 1));<br>
<br>
+ int chFmt = cu->getChromaFormat();<br>
+ int part = partitionFromSize(tuSize);<br>
+<br>
TComTURecurse tuIterator;<br>
<br>
initSection(&tuIterator, splitMode, absPartIdx);<br>
<br>
do<br>
{<br>
+ uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;<br>
+<br>
uint32_t bestMode = 0;<br>
uint32_t bestDist = 0;<br>
uint64_t bestCost = MAX_INT64;<br>
@@ -1893,9 +1908,7 @@<br>
uint32_t maxMode = NUM_CHROMA_MODE;<br>
uint32_t modeList[NUM_CHROMA_MODE];<br>
<br>
- tuIterator.m_partOffset = tuIterator.m_absPartIdxTURelCU;<br>
-<br>
- cu->getAllowedChromaDir(tuIterator.m_partOffset, modeList);<br>
+ cu->getAllowedChromaDir(absPartIdxC, modeList);<br>
<br>
//----- check chroma modes -----<br>
for (uint32_t mode = minMode; mode < maxMode; mode++)<br>
@@ -1906,16 +1919,16 @@<br>
//----- chroma coding -----<br>
uint32_t dist = 0;<br>
<br>
- cu->setChromIntraDirSubParts(modeList[mode], tuIterator.m_partOffset, depth + initTrDepth);<br>
-<br>
- xRecurIntraChromaCodingQT(cu, initTrDepth, tuIterator.m_absPartIdxTURelCU, fencYuv, predYuv, resiYuv, dist);<br>
+ cu->setChromIntraDirSubParts(modeList[mode], absPartIdxC, depth + initTrDepth);<br>
+<br>
+ xRecurIntraChromaCodingQT(cu, initTrDepth, absPartIdxC, fencYuv, predYuv, resiYuv, dist);<br>
<br>
if (cu->getSlice()->getPPS()->getUseTransformSkip())<br>
{<br>
m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);<br>
}<br>
<br>
- uint32_t bits = xGetIntraBitsQT(cu, initTrDepth, tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep, false, true);<br>
+ uint32_t bits = xGetIntraBitsQT(cu, initTrDepth, absPartIdxC, tuIterator.m_absPartIdxStep, false, true);<br>
uint64_t cost = m_rdCost->calcRdCost(dist, bits);<br>
<br>
//----- compare -----<br>
@@ -1924,37 +1937,36 @@<br>
bestCost = cost;<br>
bestDist = dist;<br>
bestMode = modeList[mode];<br>
- xSetIntraResultChromaQT(cu, initTrDepth, tuIterator.m_absPartIdxTURelCU, reconYuv);<br>
- ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U) + tuIterator.m_partOffset, tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
- ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V) + tuIterator.m_partOffset, tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
- ::memcpy(m_qtTempTransformSkipFlag[1], cu->getTransformSkip(TEXT_CHROMA_U) + tuIterator.m_partOffset, tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
- ::memcpy(m_qtTempTransformSkipFlag[2], cu->getTransformSkip(TEXT_CHROMA_V) + tuIterator.m_partOffset, tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
+ xSetIntraResultChromaQT(cu, initTrDepth, absPartIdxC, reconYuv);<br>
+ ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U) + absPartIdxC, tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
+ ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V) + absPartIdxC, tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
+ ::memcpy(m_qtTempTransformSkipFlag[1], cu->getTransformSkip(TEXT_CHROMA_U) + absPartIdxC, tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
+ ::memcpy(m_qtTempTransformSkipFlag[2], cu->getTransformSkip(TEXT_CHROMA_V) + absPartIdxC, tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
}<br>
}<br>
<br>
if (!isLastSection(&tuIterator))<br>
{<br>
- uint32_t compWidth = (cu->getCUSize(0) >> m_hChromaShift) >> initTrDepth;<br>
- uint32_t compHeight = (cu->getCUSize(0) >> m_vChromaShift) >> initTrDepth;<br>
- uint32_t zorder = cu->getZorderIdxInCU() + tuIterator.m_partOffset;<br>
- pixel* dst = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);<br>
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;<br>
uint32_t dststride = cu->getPic()->getPicYuvRec()->getCStride();<br>
- pixel* src = reconYuv->getCbAddr(tuIterator.m_partOffset);<br>
uint32_t srcstride = reconYuv->getCStride();<br>
-<br>
- primitives.blockcpy_pp(compWidth, compHeight, dst, dststride, src, srcstride);<br>
-<br>
- dst = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);<br>
- src = reconYuv->getCrAddr(tuIterator.m_partOffset);<br>
- primitives.blockcpy_pp(compWidth, compHeight, dst, dststride, src, srcstride);<br>
+ pixel *src, *dst;<br>
+<br>
+ dst = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);<br>
+ src = reconYuv->getCbAddr(absPartIdxC);<br>
+ primitives.chroma[chFmt].copy_pp[part](dst, dststride, src, srcstride);<br>
+<br>
+ dst = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);<br>
+ src = reconYuv->getCrAddr(absPartIdxC);<br>
+ primitives.chroma[chFmt].copy_pp[part](dst, dststride, src, srcstride);<br>
}<br>
<br>
//----- set data -----<br>
- ::memcpy(cu->getCbf(TEXT_CHROMA_U) + tuIterator.m_partOffset, m_qtTempCbf[1], tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
- ::memcpy(cu->getCbf(TEXT_CHROMA_V) + tuIterator.m_partOffset, m_qtTempCbf[2], tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
- ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U) + tuIterator.m_partOffset, m_qtTempTransformSkipFlag[1], tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
- ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V) + tuIterator.m_partOffset, m_qtTempTransformSkipFlag[2], tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
- cu->setChromIntraDirSubParts(bestMode, tuIterator.m_partOffset, depth + initTrDepth);<br>
+ ::memcpy(cu->getCbf(TEXT_CHROMA_U) + absPartIdxC, m_qtTempCbf[1], tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
+ ::memcpy(cu->getCbf(TEXT_CHROMA_V) + absPartIdxC, m_qtTempCbf[2], tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
+ ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U) + absPartIdxC, m_qtTempTransformSkipFlag[1], tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
+ ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V) + absPartIdxC, m_qtTempTransformSkipFlag[2], tuIterator.m_absPartIdxStep * sizeof(uint8_t));<br>
+ cu->setChromIntraDirSubParts(bestMode, absPartIdxC, depth + initTrDepth);<br>
cu->m_totalDistortion += bestDist;<br>
}<br>
while (isNextSection(&tuIterator));<br>
@@ -2685,9 +2697,11 @@<br>
::memset(cu->getCbf(TEXT_LUMA), 0, qpartnum * sizeof(uint8_t));<br>
::memset(cu->getCbf(TEXT_CHROMA_U), 0, qpartnum * sizeof(uint8_t));<br>
::memset(cu->getCbf(TEXT_CHROMA_V), 0, qpartnum * sizeof(uint8_t));<br>
+#if CHECKED_BUILD || _DEBUG<br>
::memset(cu->getCoeffY(), 0, cuSize * cuSize * sizeof(coeff_t));<br>
::memset(cu->getCoeffCb(), 0, cuSize * cuSize * sizeof(coeff_t) >> (m_hChromaShift + m_vChromaShift));<br>
::memset(cu->getCoeffCr(), 0, cuSize * cuSize * sizeof(coeff_t) >> (m_hChromaShift + m_vChromaShift));<br>
+#endif<br>
cu->setTransformSkipSubParts(0, 0, 0, 0, cu->getDepth(0));<br>
}<br>
else<br>
@@ -2841,25 +2855,26 @@<br>
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);<br>
m_trQuant->selectLambda(TEXT_LUMA);<br>
<br>
- absSumY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,<br>
+ int16_t *curResiY = resiYuv->getLumaAddr(absPartIdx);<br>
+ const uint32_t strideResiY = resiYuv->m_width;<br>
+ const uint32_t strideResiC = resiYuv->m_cwidth;<br>
+<br>
+ absSumY = m_trQuant->transformNxN(cu, curResiY, strideResiY, coeffCurY,<br>
trSize, TEXT_LUMA, absPartIdx, &lastPosY, false, curuseRDOQ);<br>
<br>
cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);<br>
<br>
if (absSumY)<br>
{<br>
- int16_t *curResiY = resiYuv->getLumaAddr(absPartIdx);<br>
-<br>
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);<br>
<br>
int scalingListType = 3 + TEXT_LUMA;<br>
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);<br>
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, resiYuv->m_width, coeffCurY, trSize, scalingListType, false, lastPosY); //this is for inter mode only<br>
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, strideResiY, coeffCurY, trSize, scalingListType, false, lastPosY); //this is for inter mode only<br>
}<br>
else<br>
{<br>
- int16_t *ptr = resiYuv->getLumaAddr(absPartIdx);<br>
- primitives.blockfill_s[sizeIdx](ptr, resiYuv->m_width, 0);<br>
+ primitives.blockfill_s[sizeIdx](curResiY, strideResiY, 0);<br>
}<br>
cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);<br>
<br>
@@ -2873,6 +2888,9 @@<br>
uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;<br>
uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;<br>
<br>
+ int16_t *curResiU = resiYuv->getCbAddr(absPartIdxC);<br>
+ int16_t *curResiV = resiYuv->getCrAddr(absPartIdxC);<br>
+<br>
cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
<br>
@@ -2881,12 +2899,12 @@<br>
<br>
m_trQuant->selectLambda(TEXT_CHROMA);<br>
<br>
- absSumU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,<br>
+ absSumU = m_trQuant->transformNxN(cu, curResiU, strideResiC, coeffCurU + subTUBufferOffset,<br>
trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPosU, false, curuseRDOQ);<br>
<br>
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);<br>
- absSumV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,<br>
+ absSumV = m_trQuant->transformNxN(cu, curResiV, strideResiC, coeffCurV + subTUBufferOffset,<br>
trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPosV, false, curuseRDOQ);<br>
<br>
cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
@@ -2894,34 +2912,29 @@<br>
<br>
if (absSumU)<br>
{<br>
- int16_t *pcResiCurrU = resiYuv->getCbAddr(absPartIdxC);<br>
-<br>
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();<br>
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);<br>
<br>
int scalingListType = 3 + TEXT_CHROMA_U;<br>
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);<br>
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, pcResiCurrU, resiYuv->m_cwidth, coeffCurU + subTUBufferOffset, trSizeC, scalingListType, false, lastPosU);<br>
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUBufferOffset, trSizeC, scalingListType, false, lastPosU);<br>
}<br>
else<br>
{<br>
- int16_t *ptr = resiYuv->getCbAddr(absPartIdxC);<br>
- primitives.blockfill_s[sizeIdxC](ptr, resiYuv->m_cwidth, 0);<br>
+ primitives.blockfill_s[sizeIdxC](curResiU, strideResiC, 0);<br>
}<br>
if (absSumV)<br>
{<br>
- int16_t *curResiV = resiYuv->getCrAddr(absPartIdxC);<br>
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);<br>
<br>
int scalingListType = 3 + TEXT_CHROMA_V;<br>
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);<br>
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, resiYuv->m_cwidth, coeffCurV + subTUBufferOffset, trSizeC, scalingListType, false, lastPosV);<br>
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUBufferOffset, trSizeC, scalingListType, false, lastPosV);<br>
}<br>
else<br>
{<br>
- int16_t *ptr = resiYuv->getCrAddr(absPartIdxC);<br>
- primitives.blockfill_s[sizeIdxC](ptr, resiYuv->m_cwidth, 0);<br>
+ primitives.blockfill_s[sizeIdxC](curResiV, strideResiC, 0);<br>
}<br>
cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
@@ -3027,6 +3040,8 @@<br>
if (bCheckFull)<br>
{<br>
uint32_t trSizeC = 1 << trSizeCLog2;<br>
+ int sizeIdx = trSizeLog2 - 2;<br>
+ int sizeIdxC = trSizeCLog2 - 2;<br>
const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;<br>
uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;<br>
uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);<br>
@@ -3070,7 +3085,7 @@<br>
do<br>
{<br>
uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;<br>
- uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;<br>
+ uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;<br>
<br>
cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
@@ -3112,7 +3127,8 @@<br>
while (isNextSection(&tuIterator));<br>
}<br>
<br>
- const uint32_t numSamplesLuma = 1 << (trSizeLog2 << 1);<br>
+ const uint32_t numCoeffY = 1 << (trSizeLog2 * 2);<br>
+ const uint32_t numCoeffC = 1 << (trSizeCLog2 * 2);<br>
<br>
for (uint32_t subTUIndex = 0; subTUIndex < 2; subTUIndex++)<br>
{<br>
@@ -3123,6 +3139,10 @@<br>
<br>
int partSize = partitionFromSize(trSize);<br>
uint32_t distY = primitives.sse_sp[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, (pixel*)RDCost::zeroPel, trSize);<br>
+ int16_t *curResiY = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);<br>
+ X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");<br>
+ const uint32_t strideResiY = MAX_CU_SIZE;<br>
+ const uint32_t strideResiC = m_qtTempShortYuv[qtlayer].m_cwidth;<br>
<br>
if (outZeroDist)<br>
{<br>
@@ -3130,16 +3150,13 @@<br>
}<br>
if (absSum[TEXT_LUMA][0])<br>
{<br>
- int16_t *curResiY = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);<br>
-<br>
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);<br>
<br>
int scalingListType = 3 + TEXT_LUMA;<br>
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);<br>
- X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");<br>
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, MAX_CU_SIZE, coeffCurY, trSize, scalingListType, false, lastPos[TEXT_LUMA][0]); //this is for inter mode only<br>
-<br>
- const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx), MAX_CU_SIZE);<br>
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, strideResiY, coeffCurY, trSize, scalingListType, false, lastPos[TEXT_LUMA][0]); //this is for inter mode only<br>
+<br>
+ const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, curResiY, strideResiY);<br>
if (cu->isLosslessCoded(0))<br>
{<br>
distY = nonZeroDistY;<br>
@@ -3154,7 +3171,9 @@<br>
if (nullCostY < singleCostY)<br>
{<br>
absSum[TEXT_LUMA][0] = 0;<br>
- ::memset(coeffCurY, 0, sizeof(coeff_t) * numSamplesLuma);<br>
+#if CHECKED_BUILD || _DEBUG<br>
+ ::memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);<br>
+#endif<br>
if (checkTransformSkipY)<br>
{<br>
minCost[TEXT_LUMA][0] = nullCostY;<br>
@@ -3182,10 +3201,7 @@<br>
<br>
if (!absSum[TEXT_LUMA][0])<br>
{<br>
- int16_t *ptr = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);<br>
- X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");<br>
- int sizeIdx = trSizeLog2 - 2;<br>
- primitives.blockfill_s[sizeIdx](ptr, MAX_CU_SIZE, 0);<br>
+ primitives.blockfill_s[sizeIdx](curResiY, strideResiY, 0);<br>
}<br>
cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);<br>
<br>
@@ -3197,13 +3213,15 @@<br>
initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);<br>
<br>
int partSizeC = partitionFromSize(trSizeC);<br>
- const uint32_t numSamplesChroma = trSizeC * trSizeC;<br>
<br>
do<br>
{<br>
uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;<br>
uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;<br>
<br>
+ int16_t *curResiU = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);<br>
+ int16_t *curResiV = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);<br>
+<br>
distU = m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, (pixel*)RDCost::zeroPel, trSizeC));<br>
<br>
if (outZeroDist)<br>
@@ -3212,18 +3230,15 @@<br>
}<br>
if (absSum[TEXT_CHROMA_U][tuIterator.m_section])<br>
{<br>
- int16_t *pcResiCurrU = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);<br>
-<br>
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();<br>
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);<br>
<br>
int scalingListType = 3 + TEXT_CHROMA_U;<br>
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);<br>
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, pcResiCurrU, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurU + subTUBufferOffset,<br>
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUBufferOffset,<br>
trSizeC, scalingListType, false, lastPos[TEXT_CHROMA_U][tuIterator.m_section]);<br>
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,<br>
- m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC),<br>
- m_qtTempShortYuv[qtlayer].m_cwidth);<br>
+ curResiU, strideResiC);<br>
const uint32_t nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);<br>
<br>
if (cu->isLosslessCoded(0))<br>
@@ -3240,7 +3255,9 @@<br>
if (nullCostU < singleCostU)<br>
{<br>
absSum[TEXT_CHROMA_U][tuIterator.m_section] = 0;<br>
- ::memset(coeffCurU + subTUBufferOffset, 0, sizeof(coeff_t) * numSamplesChroma);<br>
+#if CHECKED_BUILD || _DEBUG<br>
+ ::memset(coeffCurU + subTUBufferOffset, 0, sizeof(coeff_t) * numCoeffC);<br>
+#endif<br>
if (checkTransformSkipUV)<br>
{<br>
minCost[TEXT_CHROMA_U][tuIterator.m_section] = nullCostU;<br>
@@ -3268,10 +3285,7 @@<br>
<br>
if (!absSum[TEXT_CHROMA_U][tuIterator.m_section])<br>
{<br>
- int16_t *ptr = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);<br>
- const uint32_t stride = m_qtTempShortYuv[qtlayer].m_cwidth;<br>
- int sizeIdxC = trSizeCLog2 - 2;<br>
- primitives.blockfill_s[sizeIdxC](ptr, stride, 0);<br>
+ primitives.blockfill_s[sizeIdxC](curResiU, strideResiC, 0);<br>
}<br>
<br>
distV = m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, (pixel*)RDCost::zeroPel, trSizeC));<br>
@@ -3281,17 +3295,15 @@<br>
}<br>
if (absSum[TEXT_CHROMA_V][tuIterator.m_section])<br>
{<br>
- int16_t *curResiV = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);<br>
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);<br>
<br>
int scalingListType = 3 + TEXT_CHROMA_V;<br>
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);<br>
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurV + subTUBufferOffset,<br>
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUBufferOffset,<br>
trSizeC, scalingListType, false, lastPos[TEXT_CHROMA_V][tuIterator.m_section]);<br>
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,<br>
- m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC),<br>
- m_qtTempShortYuv[qtlayer].m_cwidth);<br>
+ curResiV, strideResiC);<br>
const uint32_t nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);<br>
<br>
if (cu->isLosslessCoded(0))<br>
@@ -3308,7 +3320,9 @@<br>
if (nullCostV < singleCostV)<br>
{<br>
absSum[TEXT_CHROMA_V][tuIterator.m_section] = 0;<br>
- ::memset(coeffCurV + subTUBufferOffset, 0, sizeof(coeff_t) * numSamplesChroma);<br>
+#if CHECKED_BUILD || _DEBUG<br>
+ ::memset(coeffCurV + subTUBufferOffset, 0, sizeof(coeff_t) * numCoeffC);<br>
+#endif<br>
if (checkTransformSkipUV)<br>
{<br>
minCost[TEXT_CHROMA_V][tuIterator.m_section] = nullCostV;<br>
@@ -3336,10 +3350,7 @@<br>
<br>
if (!absSum[TEXT_CHROMA_V][tuIterator.m_section])<br>
{<br>
- int16_t *ptr = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);<br>
- const uint32_t stride = m_qtTempShortYuv[qtlayer].m_cwidth;<br>
- int sizeIdxC = trSizeCLog2 - 2;<br>
- primitives.blockfill_s[sizeIdxC](ptr, stride, 0);<br>
+ primitives.blockfill_s[sizeIdxC](curResiV, strideResiC, 0);<br>
}<br>
<br>
cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
@@ -3354,17 +3365,11 @@<br>
uint32_t nonZeroDistY = 0, absSumTransformSkipY;<br>
uint64_t singleCostY = MAX_INT64;<br>
<br>
- int16_t *curResiY = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);<br>
- X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");<br>
-<br>
- coeff_t bestCoeffY[32 * 32];<br>
- memcpy(bestCoeffY, coeffCurY, sizeof(coeff_t) * numSamplesLuma);<br>
-<br>
- int16_t bestResiY[32 * 32];<br>
- for (int i = 0; i < trSize; ++i)<br>
- {<br>
- memcpy(bestResiY + i * trSize, curResiY + i * MAX_CU_SIZE, sizeof(int16_t) * trSize);<br>
- }<br>
+ coeff_t bestCoeffY[MAX_TS_SIZE * MAX_TS_SIZE];<br>
+ memcpy(bestCoeffY, coeffCurY, sizeof(coeff_t) * numCoeffY);<br>
+<br>
+ int16_t bestResiY[MAX_TS_SIZE * MAX_TS_SIZE];<br>
+ primitives.square_copy_ss[sizeIdx](bestResiY, trSize, curResiY, strideResiY);<br>
<br>
m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);<br>
<br>
@@ -3393,13 +3398,11 @@<br>
<br>
int scalingListType = 3 + TEXT_LUMA;<br>
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);<br>
- X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");<br>
-<br>
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, MAX_CU_SIZE, coeffCurY, trSize, scalingListType, true, lastPosTransformSkip[TEXT_LUMA][0]);<br>
+<br>
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, strideResiY, coeffCurY, trSize, scalingListType, true, lastPosTransformSkip[TEXT_LUMA][0]);<br>
<br>
nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width,<br>
- m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx),<br>
- MAX_CU_SIZE);<br>
+ curResiY, strideResiY);<br>
<br>
singleCostY = m_rdCost->calcRdCost(nonZeroDistY, skipSingleBitsY);<br>
}<br>
@@ -3407,11 +3410,8 @@<br>
if (!absSumTransformSkipY || minCost[TEXT_LUMA][0] < singleCostY)<br>
{<br>
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);<br>
- memcpy(coeffCurY, bestCoeffY, sizeof(coeff_t) * numSamplesLuma);<br>
- for (int i = 0; i < trSize; ++i)<br>
- {<br>
- memcpy(curResiY + i * MAX_CU_SIZE, &bestResiY[i * trSize], sizeof(int16_t) * trSize);<br>
- }<br>
+ memcpy(coeffCurY, bestCoeffY, sizeof(coeff_t) * numCoeffY);<br>
+ primitives.square_copy_ss[sizeIdx](curResiY, strideResiY, bestResiY, trSize);<br>
}<br>
else<br>
{<br>
@@ -3435,7 +3435,6 @@<br>
initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);<br>
<br>
int partSizeC = partitionFromSize(trSizeC);<br>
- const uint32_t numSamplesChroma = trSizeC * trSizeC;<br>
<br>
do<br>
{<br>
@@ -3444,18 +3443,14 @@<br>
<br>
int16_t *curResiU = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);<br>
int16_t *curResiV = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);<br>
- uint32_t stride = m_qtTempShortYuv[qtlayer].m_cwidth;<br>
-<br>
- coeff_t bestCoeffU[32 * 32], bestCoeffV[32 * 32];<br>
- memcpy(bestCoeffU, coeffCurU + subTUBufferOffset, sizeof(coeff_t) * numSamplesChroma);<br>
- memcpy(bestCoeffV, coeffCurV + subTUBufferOffset, sizeof(coeff_t) * numSamplesChroma);<br>
-<br>
- int16_t bestResiU[32 * 32], bestResiV[32 * 32];<br>
- for (int i = 0; i < trSizeC; ++i)<br>
- {<br>
- memcpy(&bestResiU[i * trSizeC], curResiU + i * stride, sizeof(int16_t) * trSizeC);<br>
- memcpy(&bestResiV[i * trSizeC], curResiV + i * stride, sizeof(int16_t) * trSizeC);<br>
- }<br>
+<br>
+ coeff_t bestCoeffU[MAX_TS_SIZE * MAX_TS_SIZE], bestCoeffV[MAX_TS_SIZE * MAX_TS_SIZE];<br>
+ memcpy(bestCoeffU, coeffCurU + subTUBufferOffset, sizeof(coeff_t) * numCoeffC);<br>
+ memcpy(bestCoeffV, coeffCurV + subTUBufferOffset, sizeof(coeff_t) * numCoeffC);<br>
+<br>
+ int16_t bestResiU[MAX_TS_SIZE * MAX_TS_SIZE], bestResiV[MAX_TS_SIZE * MAX_TS_SIZE];<br>
+ primitives.square_copy_ss[sizeIdxC](bestResiU, trSizeC, curResiU, strideResiC);<br>
+ primitives.square_copy_ss[sizeIdxC](bestResiV, trSizeC, curResiV, strideResiC);<br>
<br>
cu->setTransformSkipPartRange(1, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
cu->setTransformSkipPartRange(1, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
@@ -3493,11 +3488,10 @@<br>
<br>
int scalingListType = 3 + TEXT_CHROMA_U;<br>
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);<br>
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurU + subTUBufferOffset,<br>
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUBufferOffset,<br>
trSizeC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section]);<br>
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,<br>
- m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC),<br>
- m_qtTempShortYuv[qtlayer].m_cwidth);<br>
+ curResiU, strideResiC);<br>
nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);<br>
singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);<br>
}<br>
@@ -3506,11 +3500,8 @@<br>
{<br>
cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
<br>
- memcpy(coeffCurU + subTUBufferOffset, bestCoeffU, sizeof(coeff_t) * numSamplesChroma);<br>
- for (int i = 0; i < trSizeC; ++i)<br>
- {<br>
- memcpy(curResiU + i * stride, &bestResiU[i * trSizeC], sizeof(int16_t) * trSizeC);<br>
- }<br>
+ memcpy(coeffCurU + subTUBufferOffset, bestCoeffU, sizeof(coeff_t) * numCoeffC);<br>
+ primitives.square_copy_ss[sizeIdxC](curResiU, strideResiC, bestResiU, trSizeC);<br>
}<br>
else<br>
{<br>
@@ -3530,11 +3521,10 @@<br>
<br>
int scalingListType = 3 + TEXT_CHROMA_V;<br>
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);<br>
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurV + subTUBufferOffset,<br>
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUBufferOffset,<br>
trSizeC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section]);<br>
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,<br>
- m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC),<br>
- m_qtTempShortYuv[qtlayer].m_cwidth);<br>
+ curResiV, strideResiC);<br>
nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);<br>
singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);<br>
}<br>
@@ -3543,11 +3533,8 @@<br>
{<br>
cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
<br>
- memcpy(coeffCurV + subTUBufferOffset, bestCoeffV, sizeof(coeff_t) * numSamplesChroma);<br>
- for (int i = 0; i < trSizeC; ++i)<br>
- {<br>
- memcpy(curResiV + i * stride, &bestResiV[i * trSizeC], sizeof(int16_t) * trSizeC);<br>
- }<br>
+ memcpy(coeffCurV + subTUBufferOffset, bestCoeffV, sizeof(coeff_t) * numCoeffC);<br>
+ primitives.square_copy_ss[sizeIdxC](curResiV, strideResiC, bestResiV, trSizeC);<br>
}<br>
else<br>
{<br>
@@ -3560,6 +3547,7 @@<br>
cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);<br>
}<br>
while (isNextSection(&tuIterator));<br>
+<br>
}<br>
<br>
m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);<br>
@@ -3929,7 +3917,7 @@<br>
<br>
if (bCodeChroma)<br>
{<br>
- m_qtTempShortYuv[qtlayer].copyPartToPartChroma(resiYuv, absPartIdx, 1 << trSizeLog2, (bChromaSame && (chFmt != CHROMA_422)));<br>
+ m_qtTempShortYuv[qtlayer].copyPartToPartChroma(resiYuv, absPartIdx, trSize, (bChromaSame && (chFmt != CHROMA_422)));<br>
}<br>
}<br>
else<br>
diff -r a5998df9b12e -r 73f86312c2e0 source/common/primitives.cpp<br>
--- a/source/common/primitives.cpp Mon Jun 02 07:36:20 2014 +0530<br>
+++ b/source/common/primitives.cpp Mon Jun 02 11:44:59 2014 +0900<br>
@@ -55,6 +55,11 @@<br>
LUMA_4x4, LUMA_8x8, 255, LUMA_16x16, 255, 255, 255, LUMA_32x32, 255, 255, 255, 255, 255, 255, 255, LUMA_64x64<br>
};<br>
<br>
+extern const uint8_t lumaPartitionsFromSquareBlocksTable[] =<br>
+{<br>
+ LUMA_4x4, LUMA_8x8, LUMA_16x16, LUMA_32x32, LUMA_64x64<br>
+};<br>
+<br>
/* the "authoritative" set of encoder primitives */<br>
EncoderPrimitives primitives;<br>
<br>
@@ -72,6 +77,31 @@<br>
Setup_C_IPredPrimitives(p); // intrapred.cpp<br>
Setup_C_LoopFilterPrimitives(p); // loopfilter.cpp<br>
}<br>
+<br>
+static void Setup_Alias_Primitives(EncoderPrimitives &p)<br>
+{<br>
+ /* copy reusable luma primitives to chroma 4:4:4 */<br>
+ for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)<br>
+ {<br>
+ p.chroma[X265_CSP_I444].copy_pp[i] = p.luma_copy_pp[i];<br>
+ p.chroma[X265_CSP_I444].copy_ps[i] = p.luma_copy_ps[i];<br>
+ p.chroma[X265_CSP_I444].copy_sp[i] = p.luma_copy_sp[i];<br>
+ p.chroma[X265_CSP_I444].copy_ss[i] = p.luma_copy_ss[i];<br>
+ p.chroma[X265_CSP_I444].add_ps[i] = p.luma_add_ps[i];<br>
+ p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i];<br>
+ p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i];<br>
+ }<br>
+<br>
+ for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)<br>
+ {<br>
+ int partL = lumaPartitionsFromSquareBlocksTable[i];<br>
+ p.sad_square[i] = p.sad[partL];<br>
+ p.square_copy_pp[i] = p.luma_copy_pp[partL];<br>
+ p.square_copy_ps[i] = p.luma_copy_ps[partL];<br>
+ p.square_copy_sp[i] = p.luma_copy_sp[partL];<br>
+ p.square_copy_ss[i] = p.luma_copy_ss[partL];<br>
+ }<br>
+}<br>
}<br>
using namespace x265;<br>
<br>
@@ -95,6 +125,8 @@<br>
x265_log(param, X265_LOG_WARNING, "Assembly not supported in this binary\n");<br>
#endif<br>
<br>
+ Setup_Alias_Primitives(primitives);<br>
+<br>
initROM();<br>
}<br>
<br>
diff -r a5998df9b12e -r 73f86312c2e0 source/common/primitives.h<br>
--- a/source/common/primitives.h Mon Jun 02 07:36:20 2014 +0530<br>
+++ b/source/common/primitives.h Mon Jun 02 11:44:59 2014 +0900<br>
@@ -213,6 +213,10 @@<br>
copy_ss_t luma_copy_ss[NUM_LUMA_PARTITIONS];<br>
pixel_sub_ps_t luma_sub_ps[NUM_LUMA_PARTITIONS];<br>
pixel_add_ps_t luma_add_ps[NUM_LUMA_PARTITIONS];<br>
+ copy_pp_t square_copy_pp[NUM_SQUARE_BLOCKS];<br>
+ copy_sp_t square_copy_sp[NUM_SQUARE_BLOCKS];<br>
+ copy_ps_t square_copy_ps[NUM_SQUARE_BLOCKS];<br>
+ copy_ss_t square_copy_ss[NUM_SQUARE_BLOCKS];<br>
<br>
filter_pp_t luma_hpp[NUM_LUMA_PARTITIONS];<br>
filter_hps_t luma_hps[NUM_LUMA_PARTITIONS];<br>
diff -r a5998df9b12e -r 73f86312c2e0 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Mon Jun 02 07:36:20 2014 +0530<br>
+++ b/source/common/x86/asm-primitives.cpp Mon Jun 02 11:44:59 2014 +0900<br>
@@ -1316,30 +1316,12 @@<br>
}<br>
#endif // if HIGH_BIT_DEPTH<br>
<br>
- /* copy reusable luma primitives to chroma 4:4:4 */<br>
- for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)<br>
- {<br>
- p.chroma[X265_CSP_I444].copy_pp[i] = p.luma_copy_pp[i];<br>
- p.chroma[X265_CSP_I444].copy_ps[i] = p.luma_copy_ps[i];<br>
- p.chroma[X265_CSP_I444].copy_sp[i] = p.luma_copy_sp[i];<br>
- p.chroma[X265_CSP_I444].copy_ss[i] = p.luma_copy_ss[i];<br>
- p.chroma[X265_CSP_I444].add_ps[i] = p.luma_add_ps[i];<br>
- p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i];<br>
- p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i];<br>
- }<br>
-<br>
primitives.sa8d[BLOCK_4x4] = primitives.sa8d_inter[LUMA_4x4];<br>
primitives.sa8d[BLOCK_8x8] = primitives.sa8d_inter[LUMA_8x8];<br>
primitives.sa8d[BLOCK_16x16] = primitives.sa8d_inter[LUMA_16x16];<br>
primitives.sa8d[BLOCK_32x32] = primitives.sa8d_inter[LUMA_32x32];<br>
primitives.sa8d[BLOCK_64x64] = primitives.sa8d_inter[LUMA_64x64];<br>
<br>
- primitives.sad_square[BLOCK_4x4] = primitives.sad[LUMA_4x4];<br>
- primitives.sad_square[BLOCK_8x8] = primitives.sad[LUMA_8x8];<br>
- primitives.sad_square[BLOCK_16x16] = primitives.sad[LUMA_16x16];<br>
- primitives.sad_square[BLOCK_32x32] = primitives.sad[LUMA_32x32];<br>
- primitives.sad_square[BLOCK_64x64] = primitives.sad[LUMA_64x64];<br>
-<br>
// SA8D devolves to SATD for blocks not even multiples of 8x8<br>
primitives.sa8d_inter[LUMA_4x4] = primitives.satd[LUMA_4x4];<br>
primitives.sa8d_inter[LUMA_4x8] = primitives.satd[LUMA_4x8];<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div>