[x265-commits] [x265] TEncSearch: remove unnecessary pragma
Steve Borho
steve at borho.org
Tue Jun 3 05:41:47 CEST 2014
details: http://hg.videolan.org/x265/rev/bc38a1637220
branches:
changeset: 6956:bc38a1637220
user: Steve Borho <steve at borho.org>
date: Mon Jun 02 21:00:52 2014 -0500
description:
TEncSearch: remove unnecessary pragma
Subject: [x265] refine cbf==0 path: remove clearing coeff and resi
details: http://hg.videolan.org/x265/rev/b46dd1095ed8
branches:
changeset: 6957:b46dd1095ed8
user: Satoshi Nakagawa <nakagawa424 at oki.com>
date: Mon Jun 02 11:44:59 2014 +0900
description:
refine cbf==0 path: remove clearing coeff and resi
Subject: [x265] primitives: move more aliasing to Setup_Alias_Primitives
details: http://hg.videolan.org/x265/rev/31f93f0d024f
branches:
changeset: 6958:31f93f0d024f
user: Steve Borho <steve at borho.org>
date: Mon Jun 02 22:22:10 2014 -0500
description:
primitives: move more aliasing to Setup_Alias_Primitives
Subject: [x265] TEncSearch: rename variable to avoid shadowing an earlier 'part'
details: http://hg.videolan.org/x265/rev/92ef2e02f653
branches:
changeset: 6959:92ef2e02f653
user: Steve Borho <steve at borho.org>
date: Mon Jun 02 22:27:36 2014 -0500
description:
TEncSearch: rename variable to avoid shadowing an earlier 'part'
diffstat:
source/Lib/TLibEncoder/TEncEntropy.cpp | 1 -
source/Lib/TLibEncoder/TEncEntropy.h | 1 -
source/Lib/TLibEncoder/TEncSbac.cpp | 5 +-
source/Lib/TLibEncoder/TEncSearch.cpp | 438 +++++++++++++++-----------------
source/common/primitives.cpp | 47 +++
source/common/primitives.h | 4 +
source/common/x86/asm-primitives.cpp | 33 --
7 files changed, 265 insertions(+), 264 deletions(-)
diffs (truncated from 1080 to 300 lines):
diff -r 5b6c9cda191b -r 92ef2e02f653 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp Mon Jun 02 14:21:04 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Mon Jun 02 22:27:36 2014 -0500
@@ -202,7 +202,6 @@ bool TEncEntropy::isNextTUSection(TComTU
void TEncEntropy::initTUEntropySection(TComTURecurse *tuIterator, uint32_t splitMode, uint32_t absPartIdxStep, uint32_t m_absPartIdxTU)
{
- tuIterator->m_partOffset = 0;
tuIterator->m_section = 0;
tuIterator->m_absPartIdxTURelCU = m_absPartIdxTU;
tuIterator->m_splitMode = splitMode;
diff -r 5b6c9cda191b -r 92ef2e02f653 source/Lib/TLibEncoder/TEncEntropy.h
--- a/source/Lib/TLibEncoder/TEncEntropy.h Mon Jun 02 14:21:04 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncEntropy.h Mon Jun 02 22:27:36 2014 -0500
@@ -66,7 +66,6 @@ struct TComTURecurse
uint32_t m_splitMode;
uint32_t m_absPartIdxTURelCU;
uint32_t m_absPartIdxStep;
- uint32_t m_partOffset;
};
// ====================================================================================================================
diff -r 5b6c9cda191b -r 92ef2e02f653 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp Mon Jun 02 14:21:04 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp Mon Jun 02 22:27:36 2014 -0500
@@ -2120,8 +2120,9 @@ void TEncSbac::codeCoeffNxN(TComDataCU*
// compute number of significant coefficients
uint32_t numSig = primitives.count_nonzero(coeff, trSize * trSize);
- if (numSig == 0)
- return;
+#if CHECKED_BUILD || _DEBUG
+ X265_CHECK(numSig > 0, "cbf check fail");
+#endif
bool beValid;
if (cu->getCUTransquantBypass(absPartIdx))
diff -r 5b6c9cda191b -r 92ef2e02f653 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 02 14:21:04 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 02 22:27:36 2014 -0500
@@ -408,8 +408,8 @@ void TEncSearch::xIntraCodingLumaBlk(TCo
coeff_t* coeff = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
-
X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
+ const uint32_t reconQtStride = MAX_CU_SIZE;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
@@ -443,25 +443,29 @@ void TEncSearch::xIntraCodingLumaBlk(TCo
//--- set coded block flag ---
cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
- //--- inverse transform ---
if (absSum)
{
+ //--- inverse transform ---
int scalingListType = 0 + TEXT_LUMA;
- X265_CHECK(scalingListType < 6, "scalingListType is too large %d\n", scalingListType);
+ X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
+ X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
+ //===== reconstruction =====
+ primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
+ //===== update distortion =====
+ outDist += primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);
}
else
{
- int16_t* resiTmp = residual;
+#if CHECKED_BUILD || _DEBUG
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
- primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
+#endif
+ //===== reconstruction =====
+ primitives.luma_copy_ps[part](reconQt, reconQtStride, pred, stride);
+ primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, stride);
+ //===== update distortion =====
+ outDist += primitives.sse_pp[part](pred, stride, fenc, stride);
}
-
- X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
- //===== reconstruction =====
- primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);
- //===== update distortion =====
- outDist += primitives.sse_sp[part](reconQt, MAX_CU_SIZE, fenc, stride);
}
void TEncSearch::xIntraCodingChromaBlk(TComDataCU* cu,
@@ -519,67 +523,67 @@ void TEncSearch::xIntraCodingChromaBlk(T
primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
//===== transform and quantization =====
+ //--- init rate estimation arrays for RDOQ ---
+ if (useTransformSkipChroma ? m_cfg->bEnableRDOQTS : m_cfg->bEnableRDOQ)
{
- //--- init rate estimation arrays for RDOQ ---
- if (useTransformSkipChroma ? m_cfg->bEnableRDOQTS : m_cfg->bEnableRDOQ)
- {
- m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, tuSize, ttype);
- }
- //--- transform and quantization ---
- uint32_t absSum = 0;
- int lastPos = -1;
-
- int curChromaQpOffset;
- if (ttype == TEXT_CHROMA_U)
- {
- curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
- }
- else
- {
- curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
- }
- m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-
- m_trQuant->selectLambda(TEXT_CHROMA);
-
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipChroma);
-
- //--- set coded block flag ---
- cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdx, absPartIdxStep);
-
+ m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, tuSize, ttype);
+ }
+
+ //--- transform and quantization ---
+ uint32_t absSum = 0;
+ int lastPos = -1;
+
+ int curChromaQpOffset;
+ if (ttype == TEXT_CHROMA_U)
+ {
+ curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
+ }
+ else
+ {
+ curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
+ }
+ m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
+ m_trQuant->selectLambda(TEXT_CHROMA);
+
+ absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipChroma);
+
+ //--- set coded block flag ---
+ cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdx, absPartIdxStep);
+
+ uint32_t dist;
+ if (absSum)
+ {
//--- inverse transform ---
- if (absSum)
- {
- int scalingListType = 0 + ttype;
- X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
- }
- else
- {
- int16_t* resiTmp = residual;
- memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
- primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
- }
+ int scalingListType = 0 + ttype;
+ X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
+ X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
+ //===== reconstruction =====
+ primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
+ //===== update distortion =====
+ dist = primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);
}
-
- X265_CHECK(((intptr_t)residual & (tuSize - 1)) == 0, "residual alignment check failure\n");
- X265_CHECK(tuSize <= 32, "tuSize invalud\n");
- //===== reconstruction =====
- primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
- //===== update distortion =====
- uint32_t dist = primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);
+ else
+ {
+#if CHECKED_BUILD || _DEBUG
+ memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
+#endif
+ //===== reconstruction =====
+ primitives.square_copy_ps[sizeIdx](reconQt, reconQtStride, pred, stride);
+ primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);
+ //===== update distortion =====
+ dist = primitives.sse_pp[part](pred, stride, fenc, stride);
+ }
+
+ X265_CHECK(ttype == TEXT_CHROMA_U || ttype == TEXT_CHROMA_V, "invalid ttype\n");
if (ttype == TEXT_CHROMA_U)
{
outDist += m_rdCost->scaleChromaDistCb(dist);
}
- else if (ttype == TEXT_CHROMA_V)
+ else
{
outDist += m_rdCost->scaleChromaDistCr(dist);
}
- else
- {
- outDist += dist;
- }
}
void TEncSearch::xRecurIntraCodingQT(TComDataCU* cu,
@@ -800,15 +804,15 @@ void TEncSearch::xRecurIntraCodingQT(TCo
cu->setTransformSkipSubParts(bestModeId, TEXT_LUMA, absPartIdx, fullDepth);
//--- set reconstruction for next intra prediction blocks ---
- uint32_t width = cu->getCUSize(0) >> trDepth;
- uint32_t height = cu->getCUSize(0) >> trDepth;
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
int16_t* src = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
+ const uint32_t srcstride = MAX_CU_SIZE;
pixel* dst = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();
- primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE);
+ int sizeIdx = trSizeLog2 - 2;
+ primitives.square_copy_sp[sizeIdx](dst, dststride, src, srcstride);
}
outDistY += singleDistY;
@@ -882,25 +886,29 @@ void TEncSearch::residualTransformQuantI
//--- set coded block flag ---
cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
- //--- inverse transform ---
+ int part = partitionFromSize(tuSize);
+
if (absSum)
{
+ //--- inverse transform ---
int scalingListType = 0 + TEXT_LUMA;
X265_CHECK(scalingListType < 6, "scalingListType %d\n", scalingListType);
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
+
+ // Generate Recon
+ primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);
+ primitives.luma_copy_pp[part](reconIPred, reconIPredStride, recon, stride);
}
else
{
- int16_t* resiTmp = residual;
+#if CHECKED_BUILD || _DEBUG
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
- primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
+#endif
+
+ // Generate Recon
+ primitives.luma_copy_pp[part](recon, stride, pred, stride);
+ primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, stride);
}
-
- //Generate Recon
- X265_CHECK(tuSize <= 32, "tuSize is too large\n");
- int part = partitionFromSize(tuSize);
- primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);
- primitives.blockcpy_pp(tuSize, tuSize, reconIPred, reconIPredStride, recon, stride);
}
if (bCheckSplit && !bCheckFull)
@@ -996,8 +1004,10 @@ void TEncSearch::xLoadIntraResultQT(TCom
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zOrder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
int16_t* reconQt = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
- primitives.blockcpy_ps(trSize, trSize, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE);
X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
+ const uint32_t reconQtStride = MAX_CU_SIZE;
+ int sizeIdx = trSizeLog2 - 2;
+ primitives.square_copy_sp[sizeIdx](reconIPred, reconIPredStride, reconQt, reconQtStride);
}
void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
@@ -1075,8 +1085,7 @@ void TEncSearch::xLoadIntraResultChromaQ
}
//===== copy transform coefficients =====
- uint32_t trSizeC = 1 << trSizeCLog2;
- uint32_t numCoeffC = 1 << trSizeCLog2 * 2;
+ uint32_t numCoeffC = 1 << (trSizeCLog2 * 2);
uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
coeff_t* coeffDst = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
@@ -1088,12 +1097,13 @@ void TEncSearch::xLoadIntraResultChromaQ
m_qtTempTransformSkipYuv.copyPartToPartChroma(&m_qtTempShortYuv[qtlayer], absPartIdx, lumaSize, chromaId, splitIntoSubTUs);
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
- uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
int16_t* reconQt = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);
- primitives.blockcpy_ps(trSizeC, trSizeC, reconIPred, reconIPredStride, reconQt, reconQtStride);
+ uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
More information about the x265-commits
mailing list