[x265] [PATCH] rdoQuant optimization, downscaling dstCoeff fron int32_t* to int16_t*
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Sep 2 16:10:47 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1408964209 -19800
# Node ID fdb2a3fa112229319d63b43fb1946b9b90782923
# Parent 5b06f046b25ffa84d5ddafff3f0e9f618cd38625
rdoQuant optimization, downscaling dstCoeff fron int32_t* to int16_t*
diff -r 5b06f046b25f -r fdb2a3fa1122 source/common/quant.cpp
--- a/source/common/quant.cpp Tue Sep 02 12:05:17 2014 +0530
+++ b/source/common/quant.cpp Mon Aug 25 16:26:49 2014 +0530
@@ -367,7 +367,18 @@
}
if (m_useRDOQ)
- return rdoQuant(cu, coeff, log2TrSize, ttype, absPartIdx, usePsy);
+ {
+ /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
+ * optimize to take coefficients as int16_t*, it will be cleanse.*/
+ int numCoeff = 1 << log2TrSize * 2;
+ assert(numCoeff <= 1024);
+ ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
+ for (int i = 0; i < numCoeff; i++)
+ {
+ qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
+ }
+ return rdoQuant(cu, qCoeff, log2TrSize, ttype, absPartIdx, usePsy);
+ }
else
{
int deltaU[32 * 32];
@@ -472,7 +483,7 @@
/* Rate distortion optimized quantization for entropy coding engines using
* probability models like CABAC */
-uint32_t Quant::rdoQuant(TComDataCU* cu, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy)
+uint32_t Quant::rdoQuant(TComDataCU* cu, int16_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy)
{
int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
int scalingListType = (cu->isIntra(absPartIdx) ? 0 : 3) + ttype;
@@ -487,26 +498,9 @@
int numCoeff = 1 << log2TrSize * 2;
- assert(numCoeff <= 1024);
- ALIGN_VAR_16(int16_t, qCoeff1[1024]);
- for (int i = 0; i < numCoeff; i++)
- {
- qCoeff1[i] = (int16_t)Clip3(-32768, 32767, dstCoeff[i]);
- }
- uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, qCoeff1, qbits, add, numCoeff);
- for (int i = 0; i < numCoeff; i++)
- {
- dstCoeff[i] = qCoeff1[i];
- }
+ uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff);
- assert(numCoeff <= 1024);
- ALIGN_VAR_16(int16_t, qCoeff[1024]);
- for (int i = 0; i < numCoeff; i++)
- {
- qCoeff[i] = (int16_t)Clip3(-32768, 32767, dstCoeff[i]);
- }
-
- X265_CHECK((int)numSig == primitives.count_nonzero(qCoeff, 1 << log2TrSize * 2), "numSig differ\n");
+ X265_CHECK((int)numSig == primitives.count_nonzero(dstCoeff, 1 << log2TrSize * 2), "numSig differ\n");
if (!numSig)
return 0;
@@ -583,7 +577,7 @@
{
scanPos = (cgScanPos << MLS_CG_SIZE) + scanPosinCG;
uint32_t blkPos = codeParams.scan[scanPos];
- uint32_t maxAbsLevel = abs(dstCoeff[blkPos]); /* abs(quantized coeff) */
+ uint16_t maxAbsLevel = (int16_t)abs(dstCoeff[blkPos]); /* abs(quantized coeff) */
int signCoef = m_resiDctCoeff[blkPos]; /* pre-quantization DCT coeff */
int predictedCoef = m_fencDctCoeff[blkPos] - signCoef; /* predicted DCT = source DCT - residual DCT*/
@@ -633,7 +627,7 @@
const int *greaterOneBits = estBitsSbac.greaterOneBits[oneCtx];
const int *levelAbsBits = estBitsSbac.levelAbsBits[absCtx];
- uint32_t level = 0;
+ uint16_t level = 0;
uint32_t sigCoefBits = 0;
costCoeff[scanPos] = MAX_INT64;
@@ -653,8 +647,8 @@
}
if (maxAbsLevel)
{
- uint32_t minAbsLevel = X265_MAX(maxAbsLevel - 1, 1);
- for (uint32_t lvl = maxAbsLevel; lvl >= minAbsLevel; lvl--)
+ uint16_t minAbsLevel = X265_MAX(maxAbsLevel - 1, 1);
+ for (uint16_t lvl = maxAbsLevel; lvl >= minAbsLevel; lvl--)
{
uint32_t levelBits = getICRateCost(lvl, lvl - baseLevel, greaterOneBits, levelAbsBits, goRiceParam, c1c2Idx) + IEP_RATE;
@@ -892,7 +886,7 @@
numSig += (level != 0);
uint32_t mask = (int32_t)m_resiDctCoeff[blkPos] >> 31;
- dstCoeff[blkPos] = (level ^ mask) - mask;
+ dstCoeff[blkPos] = (int16_t)((level ^ mask) - mask);
}
/* clean uncoded coefficients */
@@ -939,7 +933,8 @@
* finalChange imply absolute levels (+1 is away from zero, -1 is towards zero) */
int64_t minCostInc = MAX_INT64, curCost = MAX_INT64;
- int minPos = -1, finalChange = 0, curChange = 0;
+ int minPos = -1;
+ int16_t finalChange = 0, curChange = 0;
for (n = (lastCG ? lastNZPosInCG : SCAN_SET_SIZE - 1); n >= 0; --n)
{
diff -r 5b06f046b25f -r fdb2a3fa1122 source/common/quant.h
--- a/source/common/quant.h Tue Sep 02 12:05:17 2014 +0530
+++ b/source/common/quant.h Mon Aug 25 16:26:49 2014 +0530
@@ -108,7 +108,7 @@
uint32_t signBitHidingHDQ(coeff_t* qcoeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codingParameters);
- uint32_t rdoQuant(TComDataCU* cu, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy);
+ uint32_t rdoQuant(TComDataCU* cu, int16_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy);
inline uint32_t getRateLast(uint32_t posx, uint32_t posy) const;
};
More information about the x265-devel
mailing list