[x265] [PATCH 6 of 6] reduce operators on rdoQuant() compute sigHide cost
chen
chenm003 at 163.com
Thu Sep 10 15:46:53 CEST 2015
~2.4% of total encode time on my Haswell 4770K with 4K sequence
At 2015-09-10 11:43:15,"Pradeep Ramachandran" <pradeep at multicorewareinc.com> wrote:
Nice! Do you see any tangible performance improvement from these fixes to rdoquant on dekstop/server systems?
Pradeep Ramachandran, PhD
Solution Architect,
Multicoreware Inc.
Ph: +91 99627 82018
On Thu, Sep 10, 2015 at 4:03 AM, Min Chen <chenm003 at 163.com> wrote:
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1441837075 18000
# Node ID 4a64832b90b8927f409e9239856281b02a1a2475
# Parent 41afa68a2c9c9e591c2e95264e2edc987df45d62
reduce operators on rdoQuant() compute sigHide cost
---
source/common/quant.cpp | 50 ++++++++++++++++++++++++++--------------------
1 files changed, 28 insertions(+), 22 deletions(-)
diff -r 41afa68a2c9c -r 4a64832b90b8 source/common/quant.cpp
--- a/source/common/quant.cpp Wed Sep 09 17:17:25 2015 -0500
+++ b/source/common/quant.cpp Wed Sep 09 17:17:55 2015 -0500
@@ -555,7 +555,7 @@
template<uint32_t log2TrSize>
uint32_t Quant::rdoQuant(const CUData& cu, int16_t* dstCoeff, TextType ttype, uint32_t absPartIdx, bool usePsy)
{
- int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
+ const int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
int scalingListType = (cu.isIntra(absPartIdx) ? 0 : 3) + ttype;
const uint32_t usePsyMask = usePsy ? -1 : 0;
@@ -567,7 +567,7 @@
int add = (1 << (qbits - 1));
const int32_t* qCoef = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
- int numCoeff = 1 << (log2TrSize * 2);
+ const int numCoeff = 1 << (log2TrSize * 2);
uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff);
X265_CHECK((int)numSig == primitives.cu[log2TrSize - 2].count_nonzero(dstCoeff), "numSig differ\n");
if (!numSig)
@@ -583,7 +583,7 @@
const int32_t* unquantScale = m_scalingList->m_dequantCoef[log2TrSize - 2][scalingListType][rem];
int unquantShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift + (m_scalingList->m_bEnabled ? 4 : 0);
int unquantRound = (unquantShift > per) ? 1 << (unquantShift - per - 1) : 0;
- int scaleBits = SCALE_BITS - 2 * transformShift;
+ const int scaleBits = SCALE_BITS - 2 * transformShift;
#define UNQUANT(lvl) (((lvl) * (unquantScale[blkPos] << per) + unquantRound) >> unquantShift)
#define SIGCOST(bits) ((lambda2 * (bits)) >> 8)
@@ -1046,7 +1046,7 @@
c2 += (uint32_t)(c2 - 2) >> 31;
c2Idx++;
}
- else if ((c1 < 3) && (c1 > 0) && level)
+ else if (((c1 == 1) | (c1 == 2)) && level)
c1++;
if (dstCoeff[blkPos])
@@ -1276,7 +1276,7 @@
int64_t minCostInc = MAX_INT64, curCost = MAX_INT64;
int minPos = -1;
- int16_t finalChange = 0, curChange = 0;
+ int8_t finalChange = 0, curChange = 0;
for (n = (lastCG ? lastNZPosInCG : SCAN_SET_SIZE - 1); n >= 0; --n)
{
@@ -1285,21 +1285,21 @@
int absLevel = abs(dstCoeff[blkPos]);
int d = abs(signCoef) - UNQUANT(absLevel);
- int64_t origDist = (((int64_t)d * d)) << scaleBits;
+ const int64_t origDist = (((int64_t)d * d));
-#define DELTARDCOST(d, deltabits) ((((int64_t)d * d) << scaleBits) - origDist + ((lambda2 * (int64_t)(deltabits)) >> 8))
+#define DELTARDCOST(d0, d, deltabits) ((((int64_t)d * d - d0) << scaleBits) + ((lambda2 * (int64_t)(deltabits)) >> 8))
if (dstCoeff[blkPos])
{
d = abs(signCoef) - UNQUANT(absLevel + 1);
- int64_t costUp = DELTARDCOST(d, rateIncUp[blkPos]);
+ int64_t costUp = DELTARDCOST(origDist, d, rateIncUp[blkPos]);
/* if decrementing would make the coeff 0, we can include the
* significant coeff flag cost savings */
d = abs(signCoef) - UNQUANT(absLevel - 1);
int isOne = (abs(dstCoeff[blkPos]) == 1);
int downBits = rateIncDown[blkPos] - (isOne ? (IEP_RATE + sigRateDelta[blkPos]) : 0);
- int64_t costDown = DELTARDCOST(d, downBits);
+ int64_t costDown = DELTARDCOST(origDist, d, downBits);
costDown -= (lastCG & (n == lastNZPosInCG) & isOne) * 4 * IEP_RATE;
curCost = ((n == firstNZPosInCG) & isOne) ? MAX_INT64 : costDown;
@@ -1308,8 +1308,7 @@
if (costUp < costDown)
curCost = costUp;
}
- //else if ((n < firstNZPosInCG) & (signbit != ((uint32_t)signCoef >> 31)))
- else if (n < firstNZPosInCG && signbit != (signCoef >= 0 ? 0 : 1U))
+ else if ((n < firstNZPosInCG) & (signbit != ((uint32_t)signCoef >> 31)))
{
/* don't try to make a new coded coeff before the first coeff if its
* sign would be different than the first coeff, the inferred sign would
@@ -1320,7 +1319,7 @@
{
/* evaluate changing an uncoded coeff 0 to a coded coeff +/-1 */
d = abs(signCoef) - UNQUANT(1);
- curCost = DELTARDCOST(d, rateIncUp[blkPos] + IEP_RATE + sigRateDelta[blkPos]);
+ curCost = DELTARDCOST(origDist, d, rateIncUp[blkPos] + IEP_RATE + sigRateDelta[blkPos]);
curChange = 1;
}
@@ -1332,19 +1331,26 @@
}
}
- if (dstCoeff[minPos] == 32767 || dstCoeff[minPos] == -32768)
+ // if (dstCoeff[minPos] == 32767 || dstCoeff[minPos] == -32768)
+ if (((uint16_t)dstCoeff[minPos] + 1 == (uint16_t)0x8000) | ((uint16_t)dstCoeff[minPos] == (uint16_t)0x8000))
/* don't allow sign hiding to violate the SPEC range */
finalChange = -1;
- if (dstCoeff[minPos] == 0)
- numSig++;
- else if (finalChange == -1 && abs(dstCoeff[minPos]) == 1)
- numSig--;
+ // NOTE: Reference code
+ //if (dstCoeff[minPos] == 0)
+ // numSig++;
+ //else if (finalChange == -1 && abs(dstCoeff[minPos]) == 1)
+ // numSig--;
+ numSig += (dstCoeff[minPos] == 0) - ((finalChange == -1) & (abs(dstCoeff[minPos]) == 1));
- if (m_resiDctCoeff[minPos] >= 0)
- dstCoeff[minPos] += finalChange;
- else
- dstCoeff[minPos] -= finalChange;
+
+ // NOTE: Reference code
+ //if (m_resiDctCoeff[minPos] >= 0)
+ // dstCoeff[minPos] += finalChange;
+ //else
+ // dstCoeff[minPos] -= finalChange;
+ const int16_t resiCoeffSign = ((int16_t)m_resiDctCoeff[minPos] >> 16);
+ dstCoeff[minPos] += (((int16_t)finalChange ^ resiCoeffSign) - resiCoeffSign);
}
}
_______________________________________________
x265-devel mailing list
x265-devel at videolan.org
https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150910/448ce30e/attachment-0001.html>
More information about the x265-devel
mailing list