[x265] [PATCH] Separated the logic block of code as standalone function for vectorization
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Jun 25 14:41:28 CEST 2013
# HG changeset patch
# User praveentiwari
# Date 1372164077 -19800
# Node ID 08a1bf9ba9bcd79e62059615f99aa86fee0d4f79
# Parent 4e7a3fc1c49854ab89de035b6bc9690627463d69
Separated the logic block of code as standalone function for vectorization
diff -r 4e7a3fc1c498 -r 08a1bf9ba9bc source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jun 25 17:38:30 2013 +0530
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jun 25 18:11:17 2013 +0530
@@ -739,6 +739,43 @@
} // TU loop
}
+void xCalQuantCoef(Int * piCoef,
+ Int * piQuantCoeff,
+ Int * deltaU,
+ Int * piQCoef,
+ Int * piArlCCoef,
+ UInt & uiAcSum,
+ Int iQBitsC,
+ Int iQBits,
+ Int iAdd,
+ Int iWidth,
+ Int iHeight,
+ Bool m_bUseAdaptQpSelect)
+{
+ Int iAddC = 1 << (iQBitsC - 1);
+ Int qBits8 = iQBits - 8;
+
+ for (Int n = 0; n < iWidth * iHeight; n++)
+ {
+ Int iLevel;
+ Int iSign;
+ UInt uiBlockPos = n;
+ iLevel = piCoef[uiBlockPos];
+ iSign = (iLevel < 0 ? -1 : 1);
+
+ Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
+ if (m_bUseAdaptQpSelect)
+ {
+ piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC) >> iQBitsC);
+ }
+ iLevel = (Int)((tmpLevel + iAdd) >> iQBits);
+ deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel << iQBits)) >> qBits8);
+ uiAcSum += iLevel;
+ iLevel *= iSign;
+ piQCoef[uiBlockPos] = Clip3(-32768, 32767, iLevel);
+ }
+}
+
Void TComTrQuant::xQuant(TComDataCU* pcCU,
Int* pSrc,
TCoeff* pDes,
@@ -818,28 +855,8 @@
Int iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
iAdd = (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) << (iQBits - 9);
Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
- Int iAddC = 1 << (iQBitsC - 1);
- Int qBits8 = iQBits - 8;
- for (Int n = 0; n < iWidth * iHeight; n++)
- {
- Int iLevel;
- Int iSign;
- UInt uiBlockPos = n;
- iLevel = piCoef[uiBlockPos];
- iSign = (iLevel < 0 ? -1 : 1);
-
- Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
- if (m_bUseAdaptQpSelect)
- {
- piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC) >> iQBitsC);
- }
- iLevel = (Int)((tmpLevel + iAdd) >> iQBits);
- deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel << iQBits)) >> qBits8);
- uiAcSum += iLevel;
- iLevel *= iSign;
- piQCoef[uiBlockPos] = Clip3(-32768, 32767, iLevel);
- } // for n
+ xCalQuantCoef(piCoef, piQuantCoeff, deltaU, piQCoef, piArlCCoef, uiAcSum, iQBitsC, iQBits, iAdd, iWidth, iHeight, m_bUseAdaptQpSelect);
if (pcCU->getSlice()->getPPS()->getSignHideFlag())
{
@@ -976,7 +993,7 @@
assert(bitDepth == 8);
const UInt uiLog2BlockSize = g_aucConvertToBit[uiWidth];
- x265::primitives.dct[x265::DCT_4x4 + uiLog2BlockSize - ((uiWidth==4) && (uiMode != REG_DCT))](pcResidual, m_plTempCoeff, uiStride);
+ x265::primitives.dct[x265::DCT_4x4 + uiLog2BlockSize - ((uiWidth == 4) && (uiMode != REG_DCT))](pcResidual, m_plTempCoeff, uiStride);
assert(uiWidth == uiHeight);
}
@@ -1021,10 +1038,10 @@
else
{
// ChECK_ME: I assume we don't use HIGH_BIT_DEPTH here
- assert( bitDepth == 8 );
+ assert(bitDepth == 8);
const UInt uiLog2BlockSize = g_aucConvertToBit[uiWidth];
- x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((uiWidth==4) && (uiMode != REG_DCT))](m_plTempCoeff, rpcResidual, uiStride);
+ x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((uiWidth == 4) && (uiMode != REG_DCT))](m_plTempCoeff, rpcResidual, uiStride);
}
}
@@ -1093,11 +1110,11 @@
Void TComTrQuant::xIT(Int bitDepth, UInt uiMode, Int* plCoef, Short* pResidual, UInt uiStride, Int iWidth, Int iHeight)
{
// ChECK_ME: I assume we don't use HIGH_BIT_DEPTH here
- assert( bitDepth == 8 );
+ assert(bitDepth == 8);
//xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode);
const UInt uiLog2BlockSize = g_aucConvertToBit[iWidth];
- x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((iWidth==4) && (uiMode != REG_DCT))](plCoef, pResidual, uiStride);
+ x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((iWidth == 4) && (uiMode != REG_DCT))](plCoef, pResidual, uiStride);
}
/** Wrapper function between HM interface and core 4x4 transform skipping
@@ -1850,12 +1867,12 @@
* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
* \returns cost of given absolute transform level
*/
-__inline Double TComTrQuant::xGetICRateCost(UInt uiAbsLevel,
+__inline Double TComTrQuant::xGetICRateCost(UInt uiAbsLevel,
UShort ui16CtxNumOne,
UShort ui16CtxNumAbs,
UShort ui16AbsGoRice,
- UInt c1Idx,
- UInt c2Idx) const
+ UInt c1Idx,
+ UInt c2Idx) const
{
Double iRate = xGetIEPRate();
UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
More information about the x265-devel
mailing list