[x265] [PATCH] psyrdoq: implementation of psyrdoq
Steve Borho
steve at borho.org
Tue Jul 22 17:54:06 CEST 2014
On 07/22, sumalatha at multicorewareinc.com wrote:
> # HG changeset patch
> # User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
> # Date 1406032149 -19800
> # Node ID 37e03dcd2e4f0b5894880ff8c097bd6e11590459
> # Parent d303b4d860e9f06396a156726dd518d0f41fe796
> psyrdoq: implementation of psyrdoq
>
> diff -r d303b4d860e9 -r 37e03dcd2e4f source/Lib/TLibCommon/TComTrQuant.cpp
> --- a/source/Lib/TLibCommon/TComTrQuant.cpp Mon Jul 21 22:43:38 2014 -0500
> +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jul 22 17:59:09 2014 +0530
> @@ -64,6 +64,8 @@
> return y + ((x - y) & ((x - y) >> (sizeof(int) * CHAR_BIT - 1))); // min(x, y)
> }
>
> +#define SIGN(x,y) ((x^(y >> 31))-(y >> 31))
> +
> // ====================================================================================================================
> // TComTrQuant class member functions
> // ====================================================================================================================
> @@ -307,6 +309,8 @@
> }
>
> uint32_t TComTrQuant::transformNxN(TComDataCU* cu,
> + pixel* fenc,
> + uint32_t fencStride,
> int16_t* residual,
> uint32_t stride,
> coeff_t* coeff,
> @@ -316,10 +320,10 @@
> bool useTransformSkip,
> bool curUseRDOQ)
> {
> + int trSize = 1 << log2TrSize;
> if (cu->getCUTransquantBypass(absPartIdx))
> {
> uint32_t numSig = 0;
> - int trSize = 1 << log2TrSize;
> for (int k = 0; k < trSize; k++)
> {
> for (int j = 0; j < trSize; j++)
> @@ -339,6 +343,12 @@
> const uint32_t sizeIdx = log2TrSize - 2;
> int useDST = (sizeIdx == 0 && ttype == TEXT_LUMA && cu->getPredictionMode(absPartIdx) == MODE_INTRA);
> int index = DCT_4x4 + sizeIdx - useDST;
> + if (psyRdoqEnabled())
> + {
> + // converting pixel to int and putting in separate buffer to take dct
> + primitives.square_copy_ps[sizeIdx](m_tmpfencBuf, MAX_CU_SIZE, fenc, fencStride);
> + primitives.dct[index](m_tmpfencBuf, m_tmpfencCoeff, stride);
> + }
> primitives.dct[index](residual, m_tmpCoeff, stride);
> if (m_nr->bNoiseReduction)
> {
> @@ -356,7 +366,7 @@
>
> if (m_useRDOQ && curUseRDOQ)
> {
> - return xRateDistOptQuant(cu, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx);
> + return xRateDistOptQuant(cu, m_tmpfencCoeff, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx);
> }
> return xQuant(cu, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx);
> }
> @@ -505,7 +515,7 @@
> * Rate distortion optimized quantization for entropy
> * coding engines using probability models like CABAC
> */
> -uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize,
> +uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* fencCoeff, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize,
> TextType ttype, uint32_t absPartIdx)
> {
> uint32_t trSize = 1 << log2TrSize;
> @@ -614,7 +624,7 @@
> {
> level = xGetCodedLevel(costCoeff[scanPos], curCostSig, costSig[scanPos],
> levelDouble, maxAbsLevel, baseLevel, greaterOneBits, levelAbsBits, goRiceParam,
> - c1c2Idx, qbits, scaleFactor, 1);
> + c1c2Idx, qbits, scaleFactor, 1, srcCoeff[blkPos], fencCoeff[blkPos]);
> sigRateDelta[blkPos] = 0;
> }
> else
> @@ -631,7 +641,7 @@
> curCostSig = xGetRateSigCoef(1, ctxSig);
> level = xGetCodedLevel(costCoeff[scanPos], curCostSig, costSig[scanPos],
> levelDouble, maxAbsLevel, baseLevel, greaterOneBits, levelAbsBits, goRiceParam,
> - c1c2Idx, qbits, scaleFactor, 0);
> + c1c2Idx, qbits, scaleFactor, 0, srcCoeff[blkPos], fencCoeff[blkPos]);
> }
> else
> {
> @@ -1126,7 +1136,9 @@
> uint32_t c1c2Idx,
> int qbits,
> double scaleFactor,
> - bool last) const
> + bool last,
> + int signCoef,
> + int origCoef) const
> {
> uint32_t bestAbsLevel = 0;
>
> @@ -1155,7 +1167,18 @@
> for (int absLevel = maxAbsLevel; absLevel >= minAbsLevel; absLevel--)
> {
> X265_CHECK(fabs((double)err2 - double(levelDouble - (absLevel << qbits)) * double(levelDouble - (absLevel << qbits)) * scaleFactor) < 1e-5, "err2 check failure\n");
> - double curCost = err2 + xGetICRateCost(absLevel, diffLevel, greaterOneBits, levelAbsBits, absGoRice, c1c2Idx);
> + double curCost = 0;
in x264, psy-trellis is not applied to the DC coeff nor to chroma; we
need to at least try to do that here to measure the effect.
> + if (psyRdoqEnabled())
> + {
> + int unquantAbsLevel = (maxAbsLevel << qbits);
> + int predictedCoef = origCoef - signCoef;
> + int psyValue = abs(unquantAbsLevel + SIGN(predictedCoef, signCoef)); // test with differnt cases to check whether SIGN is required
> + curCost = err2 + xGetICRateCost(absLevel, diffLevel, greaterOneBits, levelAbsBits, absGoRice, c1c2Idx) - (m_psyRdoqScale * psyValue * m_lambda);
> + }
> + else
> + {
> + curCost = err2 + xGetICRateCost(absLevel, diffLevel, greaterOneBits, levelAbsBits, absGoRice, c1c2Idx);
> + }
style: no braces around single-line expressions
> curCost += curCostSig;
>
> if (curCost < bestCodedCost)
> diff -r d303b4d860e9 -r 37e03dcd2e4f source/Lib/TLibCommon/TComTrQuant.h
> --- a/source/Lib/TLibCommon/TComTrQuant.h Mon Jul 21 22:43:38 2014 -0500
> +++ b/source/Lib/TLibCommon/TComTrQuant.h Tue Jul 22 17:59:09 2014 +0530
> @@ -120,8 +120,18 @@
> // initialize class
> void init(bool useRDOQ);
>
> + void setPsyRdoqScale(double scale)
> + {
> + m_psyRdoqScale = scale;
> + }
> +
> + inline bool psyRdoqEnabled() const
> + {
> + return m_psyRdoqScale != 0;
> + }
Don't use access methods for an internal member variable; I intend to
remove the similar methods from RDCost. It may be faster to keep a bool
for whether psy-rdoq is enabled, instead of a double comparison
> +
> // transform & inverse transform functions
> - uint32_t transformNxN(TComDataCU* cu, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t log2TrSize,
> + uint32_t transformNxN(TComDataCU* cu, pixel* fenc, uint32_t fencStride, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t log2TrSize,
> TextType ttype, uint32_t absPartIdx, bool useTransformSkip = false, bool curUseRDOQ = true);
>
> void invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
> @@ -207,7 +217,11 @@
> bool m_useRDOQ;
> bool m_scalingListEnabledFlag;
>
> - int32_t* m_tmpCoeff;
> + double m_psyRdoqScale;
> + int32_t* m_tmpCoeff; // tmp buffer which stores the dct of residual
> + int32_t m_tmpfencCoeff[MAX_CU_SIZE * MAX_CU_SIZE]; // tmp buffer which stores the dct of orig pix buffer
> + int16_t m_tmpfencBuf[MAX_CU_SIZE * MAX_CU_SIZE]; // tmp buffer which stores short of orig pixel
These buffers should have better names so they don't require trailing
comments, and use MAX_TS_SIZE.
coeff_t m_resiDctCoeff[MAX_TS_SIZE * MAX_TS_SIZE];
coeff_t m_fencDctCoeff[MAX_TS_SIZE * MAX_TS_SIZE];
int16_t m_fencShortBuf[MAX_TS_SIZE * MAX_TS_SIZE];
> int32_t* m_quantCoef[ScalingList::NUM_SIZES][ScalingList::NUM_LISTS][ScalingList::NUM_REM]; ///< array of quantization matrix coefficient 4x4
> int32_t* m_dequantCoef[ScalingList::NUM_SIZES][ScalingList::NUM_LISTS][ScalingList::NUM_REM]; ///< array of dequantization matrix coefficient 4x4
>
> @@ -221,11 +235,11 @@
> uint32_t xQuant(TComDataCU* cu, int32_t* src, coeff_t* dst, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx);
>
> // RDOQ functions
> - uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx);
> + uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* fencCoeff, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx);
>
> inline uint32_t xGetCodedLevel(double& codedCost, const double curCostSig, double& codedCostSig, int levelDouble,
> uint32_t maxAbsLevel, uint32_t baseLevel, const int *greaterOneBits, const int *levelAbsBits, uint32_t absGoRice,
> - uint32_t c1c2Idx, int qbits, double scale, bool bLast) const;
> + uint32_t c1c2Idx, int qbits, double scale, bool bLast, int sign_coef, int orig_coef) const;
>
> inline double xGetICRateCost(uint32_t absLevel, int32_t diffLevel, const int *greaterOneBits, const int *levelAbsBits, uint32_t absGoRice, uint32_t c1c2Idx) const;
>
> diff -r d303b4d860e9 -r 37e03dcd2e4f source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 21 22:43:38 2014 -0500
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jul 22 17:59:09 2014 +0530
> @@ -438,7 +438,7 @@
<snip TEncSearch diffs>
> diff -r d303b4d860e9 -r 37e03dcd2e4f source/common/param.cpp
> --- a/source/common/param.cpp Mon Jul 21 22:43:38 2014 -0500
> +++ b/source/common/param.cpp Tue Jul 22 17:59:09 2014 +0530
> @@ -163,6 +163,7 @@
> param->crQpOffset = 0;
> param->rdPenalty = 0;
> param->psyRd = 0.0;
> + param->psyRdoq = 0.5;
The patch which is pushed should have the default be 0.0 - enabing the
psycho-visual optimations by default should be a seperate patch
> param->bIntraInBFrames = 1;
> param->bLossless = 0;
> param->bCULossless = 0;
> @@ -413,11 +414,13 @@
> {
> param->rc.aqStrength = 0.0;
> param->psyRd = 0.0;
> + param->psyRdoq = 0.0;
> }
> else if (!strcmp(tune, "ssim"))
> {
> param->rc.aqMode = X265_AQ_AUTO_VARIANCE;
> param->psyRd = 0.0;
> + param->psyRdoq = 0.0;
> }
> else if (!strcmp(tune, "fastdecode") ||
> !strcmp(tune, "fast-decode"))
later in this file are warnings that are emitted if psy-rd is enabled
and SSIM or PSNR are being reported. those checks need to include
psy-rdoq
also, the psy-rdoq scale factor must be logged, if it is enabled.
> diff -r d303b4d860e9 -r 37e03dcd2e4f source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Mon Jul 21 22:43:38 2014 -0500
> +++ b/source/encoder/analysis.cpp Tue Jul 22 17:59:09 2014 +0530
> @@ -49,6 +49,7 @@
> {
> m_param = top->m_param;
> m_trQuant.init(top->m_bEnableRDOQ);
> + m_trQuant.setPsyRdoqScale(top->m_param->psyRdoq);
nit: drop top->
> if (!top->m_scalingList.m_bEnabled)
> {
> @@ -1874,7 +1875,7 @@
> primitives.chroma[m_param->internalCsp].sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
>
> // Residual encoding
> - residualTransformQuantInter(cu, 0, m_tmpResiYuv[depth], cu->getDepth(0), true);
> + residualTransformQuantInter(cu, 0, m_origYuv[0], m_tmpResiYuv[depth], cu->getDepth(0), true);
> checkDQP(cu);
>
> if (lcu->getMergeFlag(absPartIdx) && cu->getPartitionSize(0) == SIZE_2Nx2N && !cu->getQtRootCbf(0))
> diff -r d303b4d860e9 -r 37e03dcd2e4f source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Mon Jul 21 22:43:38 2014 -0500
> +++ b/source/encoder/encoder.cpp Tue Jul 22 17:59:09 2014 +0530
> @@ -1296,8 +1296,17 @@
> p->crQpOffset += 6;
> }
>
> - // disable RDOQ if psy-rd is enabled; until we make it psy-aware
> - m_bEnableRDOQ = p->psyRd == 0.0 && p->rdLevel >= 4;
> + // disable RDOQ when rdlevel < 4
> + // disable RDOQ when psyrd is enabled and psyrdoq is disabled
> + if (p->rdLevel >= 4)
> + {
> + if (p->psyRd && !p->psyRdoq)
> + m_bEnableRDOQ = 0;
> + else
> + m_bEnableRDOQ = 1;
> + }
> + else
> + m_bEnableRDOQ = 0;
conciseness is a virtue:
// disable RDOQ if rdlevel < 4 or if psy-rd is enabled without psy-rdoq
m_bEnableRDOQ = p->rdLevel >= 4 ? (p->psyRdoq || !p->psyRd) : 0;
> if (p->bLossless)
> {
> diff -r d303b4d860e9 -r 37e03dcd2e4f source/x265.h
> --- a/source/x265.h Mon Jul 21 22:43:38 2014 -0500
> +++ b/source/x265.h Tue Jul 22 17:59:09 2014 +0530
> @@ -613,6 +613,10 @@
> * energy of the source, at the cost of lost compression. Default 0.0 */
> double psyRd;
>
> + /* Psycho-visual rate-distortion opt quantize strength. Only has an effect in presets
> + * which use RDOQ. Default 0.0 */
white-space
> + double psyRdoq;
And of course, the final patch must bump X265_BUILD, add the CLI option and
param_parse support, and update doc/reST/cli.txt
--
Steve Borho
More information about the x265-devel
mailing list