[x265-commits] [x265] quant: don't bother with extra temp variables
Steve Borho
steve at borho.org
Wed Jul 30 11:02:41 CEST 2014
details: http://hg.videolan.org/x265/rev/5210fca67553
branches:
changeset: 7633:5210fca67553
user: Steve Borho <steve at borho.org>
date: Wed Jul 30 00:21:31 2014 -0500
description:
quant: don't bother with extra temp variables
Subject: [x265] quant: return signal cost from getRateLast(), do not include lambda
details: http://hg.videolan.org/x265/rev/e0320502f9ea
branches:
changeset: 7634:e0320502f9ea
user: Steve Borho <steve at borho.org>
date: Wed Jul 30 00:21:57 2014 -0500
description:
quant: return signal cost from getRateLast(), do not include lambda
Subject: [x265] quant: pass curCostSig to getCodedLevel as an integer
details: http://hg.videolan.org/x265/rev/4cb71a283ae3
branches:
changeset: 7635:4cb71a283ae3
user: Steve Borho <steve at borho.org>
date: Wed Jul 30 00:40:26 2014 -0500
description:
quant: pass curCostSig to getCodedLevel as an integer
Subject: [x265] quant: readability nit
details: http://hg.videolan.org/x265/rev/ddef8e2d88fd
branches:
changeset: 7636:ddef8e2d88fd
user: Steve Borho <steve at borho.org>
date: Wed Jul 30 00:42:52 2014 -0500
description:
quant: readability nit
Subject: [x265] quant: rename absLevel to level, remove diffLevel
details: http://hg.videolan.org/x265/rev/09ae268bb0ce
branches:
changeset: 7637:09ae268bb0ce
user: Steve Borho <steve at borho.org>
date: Wed Jul 30 00:44:03 2014 -0500
description:
quant: rename absLevel to level, remove diffLevel
Subject: [x265] quant: minor cleanups
details: http://hg.videolan.org/x265/rev/df8314a1d3cb
branches:
changeset: 7638:df8314a1d3cb
user: Steve Borho <steve at borho.org>
date: Wed Jul 30 02:46:54 2014 -0500
description:
quant: minor cleanups
Subject: [x265] psy-rdoq: implementation of psy-rdoq (highly experimental)
details: http://hg.videolan.org/x265/rev/06dcd7c5df6e
branches:
changeset: 7639:06dcd7c5df6e
user: Sumalatha Polureddy<sumalatha at multicorewareinc.com>
date: Fri Jul 25 15:28:47 2014 +0530
description:
psy-rdoq: implementation of psy-rdoq (highly experimental)
This initial version is storing a temp variable in TComTrQuant to avoid adding
even more parameters to getCodedLevel() and it is ignoring scaling lists in the
unquant operation. Currently, you may need large psy-rdoq scale values to have
any real effect. It needs lots of testing.
Subject: [x265] defs: remove DISTORTION_PRECISION_ADJUSTMENT, fix bug in 2e22ea6ec4bc
details: http://hg.videolan.org/x265/rev/38349967645f
branches:
changeset: 7640:38349967645f
user: Steve Borho <steve at borho.org>
date: Wed Jul 30 03:17:30 2014 -0500
description:
defs: remove DISTORTION_PRECISION_ADJUSTMENT, fix bug in 2e22ea6ec4bc
FULL_NBIT was disabling this macro for even 16bpp builds, but I accidentally
enabled it. Since it was previously disabled for every build, and is only even
present in SAO and quant, it is best to just remove it completely.
Subject: [x265] param: disable range checks for psy-rdoq while we tune it
details: http://hg.videolan.org/x265/rev/3d814fd1268b
branches:
changeset: 7641:3d814fd1268b
user: Steve Borho <steve at borho.org>
date: Wed Jul 30 03:44:24 2014 -0500
description:
param: disable range checks for psy-rdoq while we tune it
diffstat:
source/Lib/TLibCommon/CommonDef.h | 6 -
source/Lib/TLibCommon/TComTrQuant.cpp | 101 +++++++++++++------
source/Lib/TLibCommon/TComTrQuant.h | 17 ++-
source/Lib/TLibEncoder/TEncSampleAdaptiveOffset.cpp | 19 +--
source/Lib/TLibEncoder/TEncSearch.cpp | 34 +++---
source/Lib/TLibEncoder/TEncSearch.h | 2 +-
source/common/param.cpp | 2 +-
source/common/scalinglist.cpp | 3 +-
source/encoder/analysis.cpp | 2 +-
source/encoder/encoder.cpp | 10 +-
10 files changed, 114 insertions(+), 82 deletions(-)
diffs (truncated from 618 to 300 lines):
diff -r 05132ebe8413 -r 3d814fd1268b source/Lib/TLibCommon/CommonDef.h
--- a/source/Lib/TLibCommon/CommonDef.h Tue Jul 29 18:56:48 2014 -0700
+++ b/source/Lib/TLibCommon/CommonDef.h Wed Jul 30 03:44:24 2014 -0500
@@ -86,12 +86,6 @@
#define MDCS_ANGLE_LIMIT 4 // distance from true angle that horiz or vertical scan is allowed
#define MDCS_LOG2_MAX_SIZE 3 // TUs with log2 of size greater than this can only use diagonal scan
-#if HIGH_BIT_DEPTH
-# define DISTORTION_PRECISION_ADJUSTMENT(x) (x)
-#else
-# define DISTORTION_PRECISION_ADJUSTMENT(x) 0
-#endif
-
#define MAX_NUM_REF_PICS 16 // max. number of pictures used for reference
#define MAX_NUM_REF 16 // max. number of entries in picture reference list
diff -r 05132ebe8413 -r 3d814fd1268b source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jul 29 18:56:48 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Wed Jul 30 03:44:24 2014 -0500
@@ -39,6 +39,8 @@
using namespace x265;
+#define SIGN(x,y) ((x^(y >> 31))-(y >> 31))
+
namespace {
struct coeffGroupRDStats
@@ -174,19 +176,26 @@ inline uint32_t getICRateCost(uint32_t a
TComTrQuant::TComTrQuant()
{
m_resiDctCoeff = NULL;
+ m_fencDctCoeff = NULL;
+ m_fencShortBuf = NULL;
}
-bool TComTrQuant::init(bool useRDOQ, const ScalingList& scalingList)
+bool TComTrQuant::init(bool useRDOQ, double psyScale, const ScalingList& scalingList)
{
m_useRDOQ = useRDOQ;
+ m_psyRdoqScale = (uint64_t)(psyScale * 256.0);
m_scalingList = &scalingList;
- m_resiDctCoeff = X265_MALLOC(coeff_t, MAX_CU_SIZE * MAX_CU_SIZE);
- return m_resiDctCoeff;
+ m_resiDctCoeff = X265_MALLOC(coeff_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
+ m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
+ m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
+
+ return m_resiDctCoeff && m_fencShortBuf;
}
TComTrQuant::~TComTrQuant()
{
X265_FREE(m_resiDctCoeff);
+ X265_FREE(m_fencShortBuf);
}
void TComTrQuant::setQPforQuant(TComDataCU* cu)
@@ -350,6 +359,8 @@ uint32_t TComTrQuant::quant(TComDataCU*
}
uint32_t TComTrQuant::transformNxN(TComDataCU* cu,
+ pixel* fenc,
+ uint32_t fencStride,
int16_t* residual,
uint32_t stride,
coeff_t* coeff,
@@ -394,12 +405,20 @@ uint32_t TComTrQuant::transformNxN(TComD
}
else
{
- // TODO: this may need larger data types for X265_DEPTH > 10
const uint32_t sizeIdx = log2TrSize - 2;
- int useDST = (sizeIdx == 0 && ttype == TEXT_LUMA && cu->getPredictionMode(absPartIdx) == MODE_INTRA);
+ int useDST = !sizeIdx && ttype == TEXT_LUMA && cu->getPredictionMode(absPartIdx) == MODE_INTRA;
int index = DCT_4x4 + sizeIdx - useDST;
+ if (m_psyRdoqScale && ttype == TEXT_LUMA)
+ {
+ // converting pixel to short for input to dct and psy-rdoq eval
+ // TODO: can this be re-used? should it be performed by caller?
+ primitives.square_copy_ps[sizeIdx](m_fencShortBuf, trSize, fenc, fencStride);
+ primitives.dct[index](m_fencShortBuf, m_fencDctCoeff, trSize);
+ }
+
primitives.dct[index](residual, m_resiDctCoeff, stride);
- if (m_nr->bNoiseReduction && index)
+
+ if (m_nr->bNoiseReduction && !useDST)
{
denoiseDct(m_resiDctCoeff, m_nr->residualSum[sizeIdx], m_nr->offset[sizeIdx], (16 << sizeIdx * 2));
m_nr->count[sizeIdx]++;
@@ -490,6 +509,7 @@ uint32_t TComTrQuant::rdoQuant(TComDataC
uint32_t trSize = 1 << log2TrSize;
int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // Represents scaling through forward transform
int scalingListType = (cu->isIntra(absPartIdx) ? 0 : 3) + ttype;
+ m_transformShift = transformShift;
X265_CHECK(scalingListType < 6, "scaling list type out of range\n");
@@ -511,6 +531,7 @@ uint32_t TComTrQuant::rdoQuant(TComDataC
selectLambda(ttype);
double *errScale = m_scalingList->m_errScale[log2TrSize - 2][scalingListType][rem];
+ bool usePsy = m_psyRdoqScale && ttype == TEXT_LUMA;
double blockUncodedCost = 0;
double costCoeff[32 * 32];
@@ -524,8 +545,8 @@ uint32_t TComTrQuant::rdoQuant(TComDataC
TUEntropyCodingParameters codingParameters;
getTUEntropyCodingParameters(cu, codingParameters, absPartIdx, log2TrSize, ttype);
- const uint32_t cgSize = (1 << MLS_CG_SIZE); // 16
- double costCoeffGroupSig[MLS_GRP_NUM];
+ const uint32_t cgSize = (1 << MLS_CG_SIZE); // 4x4 coef = 16
+ double costCoeffGroupSig[MLS_GRP_NUM]; // 32x32 has 64 4x4 coding groups
uint64_t sigCoeffGroupFlag64 = 0;
uint32_t ctxSet = 0;
int c1 = 1;
@@ -599,7 +620,7 @@ uint32_t TComTrQuant::rdoQuant(TComDataC
level = getCodedLevel(costCoeff[scanPos], 0, costSig[scanPos],
levelDouble, maxAbsLevel, baseLevel,
greaterOneBits, levelAbsBits, goRiceParam,
- c1c2Idx, qbits, scaleFactor);
+ c1c2Idx, qbits, scaleFactor, blkPos, usePsy);
sigRateDelta[blkPos] = 0;
}
else
@@ -613,10 +634,10 @@ uint32_t TComTrQuant::rdoQuant(TComDataC
}
if (maxAbsLevel)
{
- level = getCodedLevel(costCoeff[scanPos], m_lambda * m_estBitsSbac.significantBits[ctxSig][1], costSig[scanPos],
+ level = getCodedLevel(costCoeff[scanPos], m_estBitsSbac.significantBits[ctxSig][1], costSig[scanPos],
levelDouble, maxAbsLevel, baseLevel,
greaterOneBits, levelAbsBits, goRiceParam,
- c1c2Idx, qbits, scaleFactor);
+ c1c2Idx, qbits, scaleFactor, blkPos, usePsy);
}
else
level = 0;
@@ -799,7 +820,7 @@ uint32_t TComTrQuant::rdoQuant(TComDataC
{
uint32_t posY = blkPos >> log2TrSize;
uint32_t posX = blkPos - (posY << log2TrSize);
- double costLast = codingParameters.scanType == SCAN_VER ? getRateLast(posY, posX) : getRateLast(posX, posY);
+ double costLast = m_lambda * (codingParameters.scanType == SCAN_VER ? getRateLast(posY, posX) : getRateLast(posX, posY));
double totalCost = baseCost + costLast - costSig[scanPos];
if (totalCost < bestCost)
@@ -840,9 +861,8 @@ uint32_t TComTrQuant::rdoQuant(TComDataC
if (cu->m_slice->m_pps->bSignHideEnabled && numSig >= 2)
{
// Note:: the scaling list is being ignored in this optimization
- int prec = DISTORTION_PRECISION_ADJUSTMENT(2 * (X265_DEPTH - 8));
int64_t invQuant = ScalingList::s_invQuantScales[rem] << per;
- int64_t rdFactor = (int64_t)((invQuant * invQuant) / (m_lambda * (16 << prec)) + 0.5);
+ int64_t rdFactor = (int64_t)((invQuant * invQuant) / (m_lambda * 16) + 0.5);
int lastCG = 1;
for (int subSet = cgLastScanPos; subSet >= 0; subSet--)
@@ -1049,7 +1069,7 @@ uint32_t TComTrQuant::getSigCtxInc(const
* This method calculates the best quantized transform level for a given scan position.
*/
inline uint32_t TComTrQuant::getCodedLevel(double& codedCost,
- const double curCostSig,
+ uint32_t curCostSig,
double& codedCostSig,
int levelDouble,
uint32_t maxAbsLevel,
@@ -1059,15 +1079,18 @@ inline uint32_t TComTrQuant::getCodedLev
uint32_t absGoRice,
uint32_t c1c2Idx,
int qbits,
- double scaleFactor) const
+ double scaleFactor,
+ int blkPos,
+ bool usePsy) const
{
+ X265_CHECK(abs((double)levelDouble - (maxAbsLevel << qbits)) < INT_MAX, "levelDouble range check failure\n");
+
uint32_t bestAbsLevel = 0;
int32_t minAbsLevel = maxAbsLevel - 1;
if (minAbsLevel < 1)
minAbsLevel = 1;
// NOTE: (A + B) ^ 2 = (A ^ 2) + 2 * A * B + (B ^ 2)
- X265_CHECK(abs((double)levelDouble - (maxAbsLevel << qbits)) < INT_MAX, "levelDouble range check failure\n");
const int32_t err1 = levelDouble - (maxAbsLevel << qbits); // A
double err2 = (double)((int64_t)err1 * err1); // A ^ 2
const int64_t err3 = (int64_t)2 * err1 * ((int64_t)1 << qbits); // 2 * A * B
@@ -1076,27 +1099,39 @@ inline uint32_t TComTrQuant::getCodedLev
err2 *= scaleFactor;
- double bestCodedCost = codedCost;
- double bestCodedCostSig = codedCostSig;
- int diffLevel = maxAbsLevel - baseLevel;
- for (int absLevel = maxAbsLevel; absLevel >= minAbsLevel; absLevel--)
+ int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - m_transformShift;
+ int add = (1 << shift) - 1;
+ int scale = m_scalingList->s_invQuantScales[m_qpParam[0].rem] << m_qpParam[0].per;
+ int scaleBits = SCALE_BITS - 2 * m_transformShift;
+
+ for (int level = maxAbsLevel; level >= minAbsLevel; level--)
{
- X265_CHECK(fabs((double)err2 - double(levelDouble - (absLevel << qbits)) * double(levelDouble - (absLevel << qbits)) * scaleFactor) < 1e-5, "err2 check failure\n");
- double curCost = err2 + m_lambda * getICRateCost(absLevel, diffLevel, greaterOneBits, levelAbsBits, absGoRice, c1c2Idx);
- curCost += curCostSig;
+ X265_CHECK(fabs((double)err2 - double(levelDouble - (level << qbits)) * double(levelDouble - (level << qbits)) * scaleFactor) < 1e-5, "err2 check failure\n");
- if (curCost < bestCodedCost)
+ uint32_t rateCost = getICRateCost(level, level - baseLevel, greaterOneBits, levelAbsBits, absGoRice, c1c2Idx);
+ double curCost = err2 + m_lambda * (curCostSig + rateCost);
+
+ /* Psy RDOQ: bias in favor of higher AC coefficients in the reconstructed frame. */
+ if (usePsy && blkPos)
{
- bestAbsLevel = absLevel;
- bestCodedCost = curCost;
- bestCodedCostSig = curCostSig;
+ int signCoef = m_resiDctCoeff[blkPos];
+ int unquantAbsLevel = (level * scale + add) >> shift;
+ int predictedCoef = m_fencDctCoeff[blkPos] - signCoef;
+ int reconCoef = abs(unquantAbsLevel + SIGN(predictedCoef, signCoef)) << scaleBits;
+ int psyValue = (m_psyRdoqScale * reconCoef) >> 8;
+ curCost -= psyValue;
}
+
+ if (curCost < codedCost)
+ {
+ bestAbsLevel = level;
+ codedCost = curCost;
+ codedCostSig = m_lambda * curCostSig;
+ }
+
err2 += errInc;
- diffLevel--;
}
- codedCost = bestCodedCost;
- codedCostSig = bestCodedCostSig;
return bestAbsLevel;
}
@@ -1105,7 +1140,7 @@ inline uint32_t TComTrQuant::getCodedLev
* \param posy Y coordinate of the last significant coefficient
* \returns cost of last significant coefficient
*/
-inline double TComTrQuant::getRateLast(uint32_t posx, uint32_t posy) const
+inline uint32_t TComTrQuant::getRateLast(uint32_t posx, uint32_t posy) const
{
uint32_t ctxX = getGroupIdx(posx);
uint32_t ctxY = getGroupIdx(posy);
@@ -1116,7 +1151,7 @@ inline double TComTrQuant::getRateLast(u
cost += maskX & (IEP_RATE * ((ctxX - 2) >> 1));
cost += maskY & (IEP_RATE * ((ctxY - 2) >> 1));
- return m_lambda * cost;
+ return cost;
}
/** Context derivation process of coeff_abs_significant_flag
diff -r 05132ebe8413 -r 3d814fd1268b source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h Tue Jul 29 18:56:48 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.h Wed Jul 30 03:44:24 2014 -0500
@@ -98,14 +98,14 @@ public:
~TComTrQuant();
/* one-time setup */
- bool init(bool useRDOQ, const ScalingList& scalingList);
+ bool init(bool useRDOQ, double scale, const ScalingList& scalingList);
/* CU setup */
void setQPforQuant(TComDataCU* cu);
void setLambdas(double lambdaY, double lambdaCb, double lambdaCr) { m_lambdas[0] = lambdaY; m_lambdas[1] = lambdaCb; m_lambdas[2] = lambdaCr; }
- uint32_t transformNxN(TComDataCU* cu, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t log2TrSize,
- TextType ttype, uint32_t absPartIdx, bool useTransformSkip, bool curUseRDOQ);
+ uint32_t transformNxN(TComDataCU* cu, pixel *fenc, uint32_t fencstride, int16_t* residual, uint32_t stride, coeff_t* coeff,
+ uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip, bool curUseRDOQ);
void invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t stride, coeff_t* coeff,
uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
@@ -119,7 +119,10 @@ public:
double m_lambdas[3];
bool m_useRDOQ;
+ uint64_t m_psyRdoqScale;
coeff_t* m_resiDctCoeff;
+ coeff_t* m_fencDctCoeff;
+ int16_t* m_fencShortBuf;
static const uint32_t IEP_RATE = 32768; // cost of an equal probable bit
@@ -135,11 +138,13 @@ protected:
uint32_t rdoQuant(TComDataCU* cu, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx);
- inline uint32_t getCodedLevel(double& codedCost, const double curCostSig, double& codedCostSig, int levelDouble,
+ inline uint32_t getCodedLevel(double& codedCost, uint32_t curCostSig, double& codedCostSig, int levelDouble,
More information about the x265-commits
mailing list