[x265] quant: returns numSig instead of absSum and lastPos
Satoshi Nakagawa
nakagawa424 at oki.com
Mon Jul 7 10:04:03 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1404720026 -32400
# Mon Jul 07 17:00:26 2014 +0900
# Node ID dcf6f2ce907c59eedc3d488a7f047a5f094bf925
# Parent 11c808e562b894d84961cf00080173321e272884
quant: returns numSig instead of absSum and lastPos
diff -r 11c808e562b8 -r dcf6f2ce907c source/Lib/TLibCommon/CommonDef.h
--- a/source/Lib/TLibCommon/CommonDef.h Thu Jul 03 15:12:45 2014 -0700
+++ b/source/Lib/TLibCommon/CommonDef.h Mon Jul 07 17:00:26 2014 +0900
@@ -118,8 +118,6 @@
#define LOG2_MAX_COLUMN_WIDTH 13
#define LOG2_MAX_ROW_HEIGHT 13
-#define REG_DCT 65535
-
#define CABAC_INIT_PRESENT_FLAG 1
#define MAX_GOP 64 ///< max. value of hierarchical GOP size
diff -r 11c808e562b8 -r dcf6f2ce907c source/Lib/TLibCommon/TComSlice.cpp
--- a/source/Lib/TLibCommon/TComSlice.cpp Thu Jul 03 15:12:45 2014 -0700
+++ b/source/Lib/TLibCommon/TComSlice.cpp Mon Jul 07 17:00:26 2014 +0900
@@ -476,7 +476,6 @@
, m_qpBDOffsetC(0)
, m_bitsForPOC(8)
, m_numLongTermRefPicSPS(0)
- , m_maxTrSize(32)
, m_bUseSAO(false)
, m_bTemporalIdNestingFlag(false)
, m_scalingListEnabledFlag(false)
diff -r 11c808e562b8 -r dcf6f2ce907c source/Lib/TLibCommon/TComSlice.h
--- a/source/Lib/TLibCommon/TComSlice.h Thu Jul 03 15:12:45 2014 -0700
+++ b/source/Lib/TLibCommon/TComSlice.h Mon Jul 07 17:00:26 2014 +0900
@@ -825,9 +825,6 @@
uint32_t m_ltRefPicPocLsbSps[33];
bool m_usedByCurrPicLtSPSFlag[33];
- // Max physical transform size
- uint32_t m_maxTrSize;
-
int m_iAMPAcc[MAX_CU_DEPTH];
bool m_bUseSAO;
@@ -954,11 +951,6 @@
void setTMVPFlagsPresent(bool b) { m_TMVPFlagsPresent = b; }
- // physical transform
- void setMaxTrSize(uint32_t u) { m_maxTrSize = u; }
-
- uint32_t getMaxTrSize() const { return m_maxTrSize; }
-
// AMP accuracy
int getAMPAcc(uint32_t depth) const { return m_iAMPAcc[depth]; }
diff -r 11c808e562b8 -r dcf6f2ce907c source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Thu Jul 03 15:12:45 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Mon Jul 07 17:00:26 2014 +0900
@@ -143,7 +143,7 @@
}
// To minimize the distortion only. No rate is considered.
-void TComTrQuant::signBitHidingHDQ(coeff_t* qCoef, coeff_t* coef, int32_t* deltaU, const TUEntropyCodingParameters &codingParameters)
+uint32_t TComTrQuant::signBitHidingHDQ(coeff_t* qCoef, coeff_t* coef, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codingParameters)
{
const uint32_t log2TrSizeCG = codingParameters.log2TrSizeCG;
@@ -249,6 +249,11 @@
finalChange = -1;
}
+ if (qCoef[minPos] == 0)
+ numSig++;
+ else if (finalChange == -1 && abs(qCoef[minPos]) == 1)
+ numSig--;
+
if (coef[minPos] >= 0)
{
qCoef[minPos] += finalChange;
@@ -261,12 +266,13 @@
}
lastCG = 0;
} // TU loop
+
+ return numSig;
}
-uint32_t TComTrQuant::xQuant(TComDataCU* cu, int32_t* coef, coeff_t* qCoef, int trSize,
- TextType ttype, uint32_t absPartIdx, int32_t *lastPos)
+uint32_t TComTrQuant::xQuant(TComDataCU* cu, int32_t* coef, coeff_t* qCoef, uint32_t log2TrSize,
+ TextType ttype, uint32_t absPartIdx)
{
- const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
TUEntropyCodingParameters codingParameters;
getTUEntropyCodingParameters(cu, codingParameters, absPartIdx, log2TrSize, ttype);
int deltaU[32 * 32];
@@ -281,13 +287,13 @@
int add = (cu->getSlice()->getSliceType() == I_SLICE ? 171 : 85) << (qbits - 9);
int numCoeff = 1 << log2TrSize * 2;
- uint32_t acSum = primitives.quant(coef, quantCoeff, deltaU, qCoef, qbits, add, numCoeff, lastPos);
+ uint32_t numSig = primitives.quant(coef, quantCoeff, deltaU, qCoef, qbits, add, numCoeff);
- if (acSum >= 2 && cu->getSlice()->getPPS()->getSignHideFlag())
+ if (numSig >= 2 && cu->getSlice()->getPPS()->getSignHideFlag())
{
- signBitHidingHDQ(qCoef, coef, deltaU, codingParameters);
+ return signBitHidingHDQ(qCoef, coef, deltaU, numSig, codingParameters);
}
- return acSum;
+ return numSig;
}
void TComTrQuant::init(bool useRDOQ)
@@ -299,73 +305,65 @@
int16_t* residual,
uint32_t stride,
coeff_t* coeff,
- uint32_t trSize,
+ uint32_t log2TrSize,
TextType ttype,
uint32_t absPartIdx,
- int32_t* lastPos,
bool useTransformSkip,
bool curUseRDOQ)
{
if (cu->getCUTransquantBypass(absPartIdx))
{
- uint32_t absSum = 0;
- for (uint32_t k = 0; k < trSize; k++)
+ uint32_t numSig = 0;
+ int trSize = 1 << log2TrSize;
+ for (int k = 0; k < trSize; k++)
{
- for (uint32_t j = 0; j < trSize; j++)
+ for (int j = 0; j < trSize; j++)
{
coeff[k * trSize + j] = ((int16_t)residual[k * stride + j]);
- absSum += abs(residual[k * stride + j]);
+ numSig += (residual[k * stride + j] != 0);
}
}
- return absSum;
+ return numSig;
}
- uint32_t mode; //luma intra pred
- if (ttype == TEXT_LUMA && cu->getPredictionMode(absPartIdx) == MODE_INTRA)
+ X265_CHECK((cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() >= log2TrSize), "transform size too large\n");
+ if (!useTransformSkip)
{
- mode = cu->getLumaIntraDir(absPartIdx);
+ // TODO: this may need larger data types for X265_DEPTH > 8
+ const uint32_t sizeIdx = log2TrSize - 2;
+ int useDST = (sizeIdx == 0 && ttype == TEXT_LUMA && cu->getPredictionMode(absPartIdx) == MODE_INTRA);
+ int index = DCT_4x4 + sizeIdx - useDST;
+ primitives.dct[index](residual, m_tmpCoeff, stride);
+ if (m_nr->bNoiseReduction)
+ {
+ if (index > 0)
+ {
+ denoiseDct(m_tmpCoeff, m_nr->residualSum[sizeIdx], m_nr->offset[sizeIdx], (16 << sizeIdx * 2));
+ m_nr->count[sizeIdx]++;
+ }
+ }
}
else
{
- mode = REG_DCT;
- }
-
- X265_CHECK((cu->getSlice()->getSPS()->getMaxTrSize() >= trSize), "transform size too large\n");
- if (useTransformSkip)
- {
- xTransformSkip(residual, stride, m_tmpCoeff, trSize);
- }
- else
- {
- // TODO: this may need larger data types for X265_DEPTH > 8
- const uint32_t log2BlockSize = g_convertToBit[trSize];
- primitives.dct[DCT_4x4 + log2BlockSize - ((trSize == 4) && (mode != REG_DCT))](residual, m_tmpCoeff, stride);
- if (m_nr->bNoiseReduction)
- {
- int index = (DCT_4x4 + log2BlockSize - ((trSize == 4) && (mode != REG_DCT)));
- if (index > 0 && index < 5)
- {
- denoiseDct(m_tmpCoeff, m_nr->residualSum[index - 1], m_nr->offset[index - 1], (16 << (index - 1) * 2));
- m_nr->count[index - 1]++;
- }
- }
+ xTransformSkip(residual, stride, m_tmpCoeff, log2TrSize);
}
if (m_useRDOQ && curUseRDOQ)
{
- return xRateDistOptQuant(cu, m_tmpCoeff, coeff, trSize, ttype, absPartIdx, lastPos);
+ return xRateDistOptQuant(cu, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx);
}
- return xQuant(cu, m_tmpCoeff, coeff, trSize, ttype, absPartIdx, lastPos);
+ return xQuant(cu, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx);
}
-void TComTrQuant::invtransformNxN(bool transQuantBypass, uint32_t mode, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t trSize, int scalingListType, bool useTransformSkip, int lastPos)
+void TComTrQuant::invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig)
{
if (transQuantBypass)
{
- for (uint32_t k = 0; k < trSize; k++)
+ int trSize = 1 << log2TrSize;
+ for (int k = 0; k < trSize; k++)
{
- for (uint32_t j = 0; j < trSize; j++)
+ for (int j = 0; j < trSize; j++)
{
residual[k * stride + j] = (int16_t)(coeff[k * trSize + j]);
}
@@ -377,37 +375,34 @@
// Values need to pass as input parameter in dequant
int per = m_qpParam.m_per;
int rem = m_qpParam.m_rem;
- bool useScalingList = getUseScalingList();
- const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
- int32_t *dequantCoef = getDequantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2);
+ int numCoeff = 1 << log2TrSize * 2;
- if (!useScalingList)
+ if (!getUseScalingList())
{
static const int invQuantScales[6] = { 40, 45, 51, 57, 64, 72 };
int scale = invQuantScales[rem] << per;
- primitives.dequant_normal(coeff, m_tmpCoeff, trSize * trSize, scale, shift);
+ primitives.dequant_normal(coeff, m_tmpCoeff, numCoeff, scale, shift);
}
else
{
// CHECK_ME: the code is not verify since this is DEAD path
- primitives.dequant_scaling(coeff, dequantCoef, m_tmpCoeff, trSize * trSize, per, shift);
+ int scalingListType = (!bIntra ? 3 : 0) + ttype;
+ X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
+ int32_t *dequantCoef = getDequantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2);
+ primitives.dequant_scaling(coeff, dequantCoef, m_tmpCoeff, numCoeff, per, shift);
}
- if (useTransformSkip == true)
+ if (!useTransformSkip)
{
- xITransformSkip(m_tmpCoeff, residual, stride, trSize);
- }
- else
- {
- // CHECK_ME: we can't here when no any coeff
- X265_CHECK(lastPos >= 0, "lastPos negative\n");
+ const uint32_t sizeIdx = log2TrSize - 2;
+ int useDST = (sizeIdx == 0 && ttype == TEXT_LUMA && bIntra);
- const uint32_t log2BlockSize = log2TrSize - 2;
+ X265_CHECK(numSig == primitives.count_nonzero(coeff, 1 << log2TrSize * 2), "numSig differ\n");
// DC only
- if (lastPos == 0 && !((trSize == 4) && (mode != REG_DCT)))
+ if (numSig == 1 && coeff[0] != 0 && !useDST)
{
const int shift_1st = 7;
const int add_1st = 1 << (shift_1st - 1);
@@ -415,13 +410,17 @@
const int add_2nd = 1 << (shift_2nd - 1);
int dc_val = (((m_tmpCoeff[0] * 64 + add_1st) >> shift_1st) * 64 + add_2nd) >> shift_2nd;
- primitives.blockfill_s[log2BlockSize](residual, stride, dc_val);
+ primitives.blockfill_s[sizeIdx](residual, stride, dc_val);
return;
}
// TODO: this may need larger data types for X265_DEPTH > 8
- primitives.idct[IDCT_4x4 + log2BlockSize - ((trSize == 4) && (mode != REG_DCT))](m_tmpCoeff, residual, stride);
+ primitives.idct[IDCT_4x4 + sizeIdx - useDST](m_tmpCoeff, residual, stride);
+ }
+ else
+ {
+ xITransformSkip(m_tmpCoeff, residual, stride, log2TrSize);
}
}
@@ -435,12 +434,10 @@
* \param stride stride of input residual data
* \param size transform size (size x size)
*/
-void TComTrQuant::xTransformSkip(int16_t* resiBlock, uint32_t stride, int32_t* coeff, int trSize)
+void TComTrQuant::xTransformSkip(int16_t* resiBlock, uint32_t stride, int32_t* coeff, uint32_t log2TrSize)
{
- uint32_t log2TrSize = g_convertToBit[trSize] + 2;
- int shift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
- uint32_t transformSkipShift;
- int j, k;
+ int trSize = 1 << log2TrSize;
+ int shift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
if (shift >= 0)
{
@@ -448,15 +445,14 @@
}
else
{
- //The case when X265_DEPTH > 13
- int offset;
- transformSkipShift = -shift;
- offset = (1 << (transformSkipShift - 1));
- for (j = 0; j < trSize; j++)
+ // The case when X265_DEPTH > 13
+ shift = -shift;
+ int offset = (1 << (shift - 1));
+ for (int j = 0; j < trSize; j++)
{
- for (k = 0; k < trSize; k++)
+ for (int k = 0; k < trSize; k++)
{
- coeff[j * trSize + k] = (resiBlock[j * stride + k] + offset) >> transformSkipShift;
+ coeff[j * trSize + k] = (resiBlock[j * stride + k] + offset) >> shift;
}
}
}
@@ -468,11 +464,10 @@
* \param stride stride of input residual data
* \param size transform size (size x size)
*/
-void TComTrQuant::xITransformSkip(int32_t* coef, int16_t* residual, uint32_t stride, int trSize)
+void TComTrQuant::xITransformSkip(int32_t* coef, int16_t* residual, uint32_t stride, uint32_t log2TrSize)
{
- uint32_t log2TrSize = g_convertToBit[trSize] + 2;
- int shift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
- int j, k;
+ int trSize = 1 << log2TrSize;
+ int shift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
if (shift > 0)
{
@@ -480,13 +475,13 @@
}
else
{
- //The case when X265_DEPTH >= 13
- uint32_t transformSkipShift = -shift;
- for (j = 0; j < trSize; j++)
+ // The case when X265_DEPTH >= 13
+ shift = -shift;
+ for (int j = 0; j < trSize; j++)
{
- for (k = 0; k < trSize; k++)
+ for (int k = 0; k < trSize; k++)
{
- residual[j * stride + k] = coef[j * trSize + k] << transformSkipShift;
+ residual[j * stride + k] = coef[j * trSize + k] << shift;
}
}
}
@@ -501,14 +496,14 @@
* \param uiAbsSum reference to absolute sum of quantized transform coefficient
* \param ttype plane type / luminance or chrominance
* \param absPartIdx absolute partition index
- * \returns void
+ * \returns number of significant coefficient
* Rate distortion optimized quantization for entropy
* coding engines using probability models like CABAC
*/
-uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t trSize,
- TextType ttype, uint32_t absPartIdx, int32_t *lastPos)
+uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize,
+ TextType ttype, uint32_t absPartIdx)
{
- const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
+ uint32_t trSize = 1 << log2TrSize;
int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // Represents scaling through forward transform
int scalingListType = (cu->isIntra(absPartIdx) ? 0 : 3) + ttype;
@@ -567,7 +562,7 @@
const uint32_t cgPosX = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
memset(&rdStats, 0, sizeof(coeffGroupRDStats));
- X265_CHECK((trSize >> 2) == (1 << codingParameters.log2TrSizeCG), "transform size invalid\n");
+ X265_CHECK(log2TrSize - 2 == codingParameters.log2TrSizeCG, "transform size invalid\n");
const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
{
@@ -845,14 +840,12 @@
} // end if (sigCoeffGroupFlag[ cgBlkPos ])
} // end for
- uint32_t absSum = 0;
+ numSig = 0;
for (int pos = 0; pos < bestLastIdxp1; pos++)
{
int blkPos = codingParameters.scan[pos];
int level = dstCoeff[blkPos];
- absSum += level;
- if (level)
- *lastPos = blkPos;
+ numSig += (level != 0);
uint32_t mask = (int32_t)srcCoeff[blkPos] >> 31;
dstCoeff[blkPos] = (level ^ mask) - mask;
}
@@ -863,7 +856,7 @@
dstCoeff[codingParameters.scan[pos]] = 0;
}
- if (cu->getSlice()->getPPS()->getSignHideFlag() && absSum >= 2)
+ if (cu->getSlice()->getPPS()->getSignHideFlag() && numSig >= 2)
{
int64_t rdFactor = (int64_t)(
g_invQuantScales[m_qpParam.rem()] * g_invQuantScales[m_qpParam.rem()] * (1 << (2 * m_qpParam.m_per))
@@ -901,14 +894,14 @@
if (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD)
{
uint32_t signbit = (dstCoeff[codingParameters.scan[subPos + firstNZPosInCG]] > 0 ? 0 : 1);
- int tmpSum = 0;
+ int absSum = 0;
for (n = firstNZPosInCG; n <= lastNZPosInCG; n++)
{
- tmpSum += dstCoeff[codingParameters.scan[n + subPos]];
+ absSum += dstCoeff[codingParameters.scan[n + subPos]];
}
- if (signbit != (tmpSum & 0x1)) // hide but need tune
+ if (signbit != (absSum & 0x1)) // hide but need tune
{
// calculate the cost
int64_t minCostInc = MAX_INT64, curCost = MAX_INT64;
@@ -974,6 +967,11 @@
finalChange = -1;
}
+ if (dstCoeff[minPos] == 0)
+ numSig++;
+ else if (finalChange == -1 && abs(dstCoeff[minPos]) == 1)
+ numSig--;
+
if (srcCoeff[minPos] >= 0)
{
dstCoeff[minPos] += finalChange;
@@ -988,7 +986,7 @@
}
}
- return absSum;
+ return numSig;
}
/** Pattern decision for context derivation process of significant_coeff_flag
diff -r 11c808e562b8 -r dcf6f2ce907c source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h Thu Jul 03 15:12:45 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.h Mon Jul 07 17:00:26 2014 +0900
@@ -127,10 +127,10 @@
void init(bool useRDOQ);
// transform & inverse transform functions
- uint32_t transformNxN(TComDataCU* cu, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t trSize,
- TextType ttype, uint32_t absPartIdx, int32_t* lastPos, bool useTransformSkip = false, bool curUseRDOQ = true);
+ uint32_t transformNxN(TComDataCU* cu, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t log2TrSize,
+ TextType ttype, uint32_t absPartIdx, bool useTransformSkip = false, bool curUseRDOQ = true);
- void invtransformNxN(bool transQuantBypass, uint32_t mode, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t trSize, int scalingListType, bool useTransformSkip = false, int lastPos = MAX_INT);
+ void invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
// Misc functions
void setQPforQuant(int qpy, TextType ttype, int qpBdOffset, int chromaQPOffset, int chFmt);
@@ -219,12 +219,13 @@
private:
- void xTransformSkip(int16_t* resiBlock, uint32_t stride, int32_t* coeff, int trSize);
- void signBitHidingHDQ(coeff_t* qcoeff, coeff_t* coeff, int32_t* deltaU, const TUEntropyCodingParameters &codingParameters);
- uint32_t xQuant(TComDataCU* cu, int32_t* src, coeff_t* dst, int trSize, TextType ttype, uint32_t absPartIdx, int32_t *lastPos);
+ void xITransformSkip(int32_t* coeff, int16_t* residual, uint32_t stride, uint32_t log2TrSize);
+ void xTransformSkip(int16_t* resiBlock, uint32_t stride, int32_t* coeff, uint32_t log2TrSize);
+ uint32_t signBitHidingHDQ(coeff_t* qcoeff, coeff_t* coeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codingParameters);
+ uint32_t xQuant(TComDataCU* cu, int32_t* src, coeff_t* dst, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx);
// RDOQ functions
- uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t trSize, TextType ttype, uint32_t absPartIdx, int32_t *lastPos);
+ uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx);
inline uint32_t xGetCodedLevel(double& codedCost, const double curCostSig, double& codedCostSig, int levelDouble,
uint32_t maxAbsLevel, uint32_t baseLevel, const int *greaterOneBits, const int *levelAbsBits, uint32_t absGoRice,
@@ -243,8 +244,6 @@
inline double xGetICost(double rate) const { return m_lambda * rate; } ///< Get the cost for a specific rate
inline uint32_t xGetIEPRate() const { return 32768; } ///< Get the cost of an equal probable bit
-
- void xITransformSkip(int32_t* coeff, int16_t* residual, uint32_t stride, int trSize);
};
}
//! \}
diff -r 11c808e562b8 -r dcf6f2ce907c source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp Thu Jul 03 15:12:45 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp Mon Jul 07 17:00:26 2014 +0900
@@ -2046,7 +2046,7 @@
DTRACE_CABAC_T("\n")
#endif // if ENC_DEC_TRACE
- X265_CHECK(trSize <= m_slice->getSPS()->getMaxTrSize(), "transform size out of range\n");
+ X265_CHECK(log2TrSize <= m_slice->getSPS()->getQuadtreeTULog2MaxSize(), "transform size out of range\n");
// compute number of significant coefficients
uint32_t numSig = primitives.count_nonzero(coeff, (1 << (log2TrSize << 1)));
diff -r 11c808e562b8 -r dcf6f2ce907c source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jul 03 15:12:45 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 07 17:00:26 2014 +0900
@@ -452,22 +452,17 @@
}
//--- transform and quantization ---
- uint32_t absSum;
- int lastPos = -1;
-
int chFmt = cu->getChromaFormat();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
+ uint32_t numSig = m_trQuant->transformNxN(cu, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTransformSkip);
//--- set coded block flag ---
- cbf = absSum ? 1 : 0;
-
- if (absSum)
+ cbf = numSig ? 1 : 0;
+
+ if (numSig)
{
//--- inverse transform ---
- int scalingListType = 0 + TEXT_LUMA;
- X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTransformSkip, numSig);
X265_CHECK(log2TrSize <= 5, "log2TrSize is too large %d\n", log2TrSize);
//===== reconstruction =====
primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
@@ -528,9 +523,6 @@
}
//--- transform and quantization ---
- uint32_t absSum;
- int lastPos = -1;
-
int chFmt = cu->getChromaFormat();
int curChromaQpOffset;
if (ttype == TEXT_CHROMA_U)
@@ -542,18 +534,16 @@
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
}
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipC);
+ uint32_t numSig = m_trQuant->transformNxN(cu, residual, stride, coeff, log2TrSizeC, ttype, absPartIdx, useTransformSkipC);
//--- set coded block flag ---
- cbf = absSum ? 1 : 0;
+ cbf = numSig ? 1 : 0;
uint32_t dist;
- if (absSum)
+ if (numSig)
{
//--- inverse transform ---
- int scalingListType = 0 + ttype;
- X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipC, lastPos);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), residual, stride, coeff, log2TrSizeC, ttype, true, useTransformSkipC, numSig);
X265_CHECK(log2TrSizeC <= 5, "log2TrSizeC is too large %d\n", log2TrSizeC);
//===== reconstruction =====
primitives.calcrecon[sizeIdxC](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
@@ -926,23 +916,18 @@
primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
//===== transform and quantization =====
- uint32_t absSum = 0;
- int lastPos = -1;
-
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
+ uint32_t numSig = m_trQuant->transformNxN(cu, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTransformSkip);
//--- set coded block flag ---
- cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
+ cu->setCbfSubParts((numSig ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
int part = partitionFromSize(tuSize);
- if (absSum)
+ if (numSig)
{
//--- inverse transform ---
- int scalingListType = 0 + TEXT_LUMA;
- X265_CHECK(scalingListType < 6, "scalingListType %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTransformSkip, numSig);
// Generate Recon
primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);
@@ -1432,9 +1417,6 @@
primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
//--- transform and quantization ---
- uint32_t absSum = 0;
- int lastPos = -1;
-
int curChromaQpOffset;
if (ttype == TEXT_CHROMA_U)
{
@@ -1445,17 +1427,15 @@
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
}
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdxC, &lastPos, useTransformSkipC);
+ uint32_t numSig = m_trQuant->transformNxN(cu, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTransformSkipC);
//--- set coded block flag ---
- cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdxC, tuIterator.m_absPartIdxStep);
-
- if (absSum)
+ cu->setCbfPartRange((((numSig > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdxC, tuIterator.m_absPartIdxStep);
+
+ if (numSig)
{
//--- inverse transform ---
- int scalingListType = 0 + ttype;
- X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipC, lastPos);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), residual, stride, coeff, log2TrSizeC, ttype, true, useTransformSkipC, numSig);
//===== reconstruction =====
// use square primitives
@@ -2692,8 +2672,6 @@
X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
// code full block
- uint32_t absSumY = 0, absSumU = 0, absSumV = 0;
- int lastPosY = -1, lastPosU = -1, lastPosV = -1;
if (bCheckFull)
{
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
@@ -2716,8 +2694,6 @@
coeff_t *coeffCurU = cu->getCoeffCb() + coeffOffsetC;
coeff_t *coeffCurV = cu->getCoeffCr() + coeffOffsetC;
- uint32_t trSize = 1 << log2TrSize;
- uint32_t trSizeC = 1 << log2TrSizeC;
uint32_t sizeIdx = log2TrSize - 2;
uint32_t sizeIdxC = log2TrSizeC - 2;
cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
@@ -2729,24 +2705,20 @@
const uint32_t strideResiC = resiYuv->m_cwidth;
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
- absSumY = m_trQuant->transformNxN(cu, curResiY, strideResiY, coeffCurY,
- trSize, TEXT_LUMA, absPartIdx, &lastPosY, false, curuseRDOQ);
-
- cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
-
- if (absSumY)
+ uint32_t numSigY = m_trQuant->transformNxN(cu, curResiY, strideResiY, coeffCurY,
+ log2TrSize, TEXT_LUMA, absPartIdx, false, curuseRDOQ);
+
+ cu->setCbfSubParts(numSigY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
+
+ if (numSigY)
{
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
-
- int scalingListType = 3 + TEXT_LUMA;
- X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, strideResiY, coeffCurY, trSize, scalingListType, false, lastPosY); //this is for inter mode only
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
}
else
{
primitives.blockfill_s[sizeIdx](curResiY, strideResiY, 0);
}
- cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
if (bCodeChroma)
{
@@ -2766,45 +2738,37 @@
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
- absSumU = m_trQuant->transformNxN(cu, curResiU, strideResiC, coeffCurU + subTUOffset,
- trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPosU, false, curuseRDOQ);
+ uint32_t numSigU = m_trQuant->transformNxN(cu, curResiU, strideResiC, coeffCurU + subTUOffset,
+ log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false, curuseRDOQ);
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
- absSumV = m_trQuant->transformNxN(cu, curResiV, strideResiC, coeffCurV + subTUOffset,
- trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPosV, false, curuseRDOQ);
-
- cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
- cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
-
- if (absSumU)
+ uint32_t numSigV = m_trQuant->transformNxN(cu, curResiV, strideResiC, coeffCurV + subTUOffset,
+ log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false, curuseRDOQ);
+
+ cu->setCbfPartRange(numSigU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
+ cu->setCbfPartRange(numSigV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
+
+ if (numSigU)
{
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-
- int scalingListType = 3 + TEXT_CHROMA_U;
- X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUOffset, trSizeC, scalingListType, false, lastPosU);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
}
else
{
primitives.blockfill_s[sizeIdxC](curResiU, strideResiC, 0);
}
- if (absSumV)
+ if (numSigV)
{
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-
- int scalingListType = 3 + TEXT_CHROMA_V;
- X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUOffset, trSizeC, scalingListType, false, lastPosV);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
}
else
{
primitives.blockfill_s[sizeIdxC](curResiV, strideResiC, 0);
}
- cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
- cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
}
while (isNextSection(&tuIterator));
@@ -2894,9 +2858,8 @@
uint32_t singleBitsComp[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
uint32_t singleDistComp[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
uint32_t singlePsyEnergyComp[MAX_NUM_COMPONENT][2] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
- uint32_t absSum[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
+ uint32_t numSigY = 0;
uint32_t bestTransformMode[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
- int lastPos[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { -1, -1 }, { -1, -1 }, { -1, -1 } };
uint64_t minCost[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/];
uint32_t bestCBF[MAX_NUM_COMPONENT];
@@ -2910,6 +2873,8 @@
// code full block
if (bCheckFull)
{
+ uint32_t numSigU[2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { 0, 0 };
+ uint32_t numSigV[2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { 0, 0 };
uint32_t trSizeC = 1 << log2TrSizeC;
int sizeIdx = log2TrSize - 2;
int sizeIdxC = log2TrSizeC - 2;
@@ -2933,14 +2898,14 @@
}
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
- absSum[TEXT_LUMA][0] = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
- trSize, TEXT_LUMA, absPartIdx, &lastPos[TEXT_LUMA][0], false, curuseRDOQ);
-
- cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
+ numSigY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
+ log2TrSize, TEXT_LUMA, absPartIdx, false, curuseRDOQ);
+
+ cu->setCbfSubParts(numSigY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
m_entropyCoder->resetBits();
m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
- if (absSum[TEXT_LUMA][0])
+ if (numSigY)
m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
singleBitsComp[TEXT_LUMA][0] = m_entropyCoder->getNumberOfWrittenBits();
@@ -2966,24 +2931,24 @@
//Cb transform
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
- absSum[TEXT_CHROMA_U][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUOffset,
- trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPos[TEXT_CHROMA_U][tuIterator.m_section], false, curuseRDOQ);
+ numSigU[tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUOffset,
+ log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false, curuseRDOQ);
//Cr transform
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
- absSum[TEXT_CHROMA_V][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUOffset,
- trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPos[TEXT_CHROMA_V][tuIterator.m_section], false, curuseRDOQ);
-
- cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
- cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
+ numSigV[tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUOffset,
+ log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false, curuseRDOQ);
+
+ cu->setCbfPartRange(numSigU[tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
+ cu->setCbfPartRange(numSigV[tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_U, trMode);
- if (absSum[TEXT_CHROMA_U][tuIterator.m_section])
+ if (numSigU[tuIterator.m_section])
m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUOffset, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsPrev;
m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_V, trMode);
- if (absSum[TEXT_CHROMA_V][tuIterator.m_section])
+ if (numSigV[tuIterator.m_section])
m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUOffset, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
uint32_t newBits = m_entropyCoder->getNumberOfWrittenBits();
singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] = newBits - (singleBitsPrev + singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
@@ -3021,13 +2986,10 @@
{
*outZeroDist += distY;
}
- if (absSum[TEXT_LUMA][0])
+ if (numSigY)
{
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
-
- int scalingListType = 3 + TEXT_LUMA;
- X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, strideResiY, coeffCurY, trSize, scalingListType, false, lastPos[TEXT_LUMA][0]); //this is for inter mode only
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY); //this is for inter mode only
const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, curResiY, strideResiY);
uint32_t nonZeroPsyEnergyY = 0;
@@ -3066,7 +3028,7 @@
nullCostY = m_rdCost->calcRdCost(distY, nullBitsY);
if (nullCostY < singleCostY)
{
- absSum[TEXT_LUMA][0] = 0;
+ numSigY = 0;
#if CHECKED_BUILD || _DEBUG
::memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);
#endif
@@ -3099,11 +3061,11 @@
singleDistComp[TEXT_LUMA][0] = distY;
singlePsyEnergyComp[TEXT_LUMA][0] = psyEnergyY;
- if (!absSum[TEXT_LUMA][0])
+ if (!numSigY)
{
primitives.blockfill_s[sizeIdx](curResiY, strideResiY, 0);
}
- cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
+ cu->setCbfSubParts(numSigY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
uint32_t distU = 0;
uint32_t distV = 0;
@@ -3130,19 +3092,17 @@
{
*outZeroDist += distU;
}
- if (absSum[TEXT_CHROMA_U][tuIterator.m_section])
+ if (numSigU[tuIterator.m_section])
{
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-
- int scalingListType = 3 + TEXT_CHROMA_U;
- X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUOffset,
- trSizeC, scalingListType, false, lastPos[TEXT_CHROMA_U][tuIterator.m_section]);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiU, strideResiC, coeffCurU + subTUOffset,
+ log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU[tuIterator.m_section]);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
curResiU, strideResiC);
const uint32_t nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
- uint32_t nonZeroPsyEnergyU = 0;
+ uint32_t nonZeroPsyEnergyU = 0;
+
if (m_rdCost->psyRdEnabled())
{
pixel* pred = predYuv->getCbAddr(absPartIdxC);
@@ -3178,7 +3138,7 @@
nullCostU = m_rdCost->calcRdCost(distU, nullBitsU);
if (nullCostU < singleCostU)
{
- absSum[TEXT_CHROMA_U][tuIterator.m_section] = 0;
+ numSigU[tuIterator.m_section] = 0;
#if CHECKED_BUILD || _DEBUG
::memset(coeffCurU + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
#endif
@@ -3212,7 +3172,7 @@
singleDistComp[TEXT_CHROMA_U][tuIterator.m_section] = distU;
singlePsyEnergyComp[TEXT_CHROMA_U][tuIterator.m_section] = psyEnergyU;
- if (!absSum[TEXT_CHROMA_U][tuIterator.m_section])
+ if (!numSigU[tuIterator.m_section])
{
primitives.blockfill_s[sizeIdxC](curResiU, strideResiC, 0);
}
@@ -3222,15 +3182,12 @@
{
*outZeroDist += distV;
}
- if (absSum[TEXT_CHROMA_V][tuIterator.m_section])
+ if (numSigV[tuIterator.m_section])
{
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-
- int scalingListType = 3 + TEXT_CHROMA_V;
- X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUOffset,
- trSizeC, scalingListType, false, lastPos[TEXT_CHROMA_V][tuIterator.m_section]);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiV, strideResiC, coeffCurV + subTUOffset,
+ log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV[tuIterator.m_section]);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
curResiV, strideResiC);
const uint32_t nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
@@ -3271,7 +3228,7 @@
nullCostV = m_rdCost->calcRdCost(distV, nullBitsV);
if (nullCostV < singleCostV)
{
- absSum[TEXT_CHROMA_V][tuIterator.m_section] = 0;
+ numSigV[tuIterator.m_section] = 0;
#if CHECKED_BUILD || _DEBUG
::memset(coeffCurV + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
#endif
@@ -3305,21 +3262,20 @@
singleDistComp[TEXT_CHROMA_V][tuIterator.m_section] = distV;
singlePsyEnergyComp[TEXT_CHROMA_V][tuIterator.m_section] = psyEnergyV;
- if (!absSum[TEXT_CHROMA_V][tuIterator.m_section])
+ if (!numSigV[tuIterator.m_section])
{
primitives.blockfill_s[sizeIdxC](curResiV, strideResiC, 0);
}
- cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
- cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
+ cu->setCbfPartRange(numSigU[tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
+ cu->setCbfPartRange(numSigV[tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
}
while (isNextSection(&tuIterator));
}
- int lastPosTransformSkip[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { -1, -1 }, { -1, -1 }, { -1, -1 } };
if (checkTransformSkipY)
{
- uint32_t nonZeroDistY = 0, absSumTransformSkipY;
+ uint32_t nonZeroDistY = 0;
uint32_t nonZeroPsyEnergyY = 0;
uint64_t singleCostY = MAX_INT64;
@@ -3336,11 +3292,11 @@
}
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
- absSumTransformSkipY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, tsCoeffY,
- trSize, TEXT_LUMA, absPartIdx, &lastPosTransformSkip[TEXT_LUMA][0], true, curuseRDOQ);
- cu->setCbfSubParts(absSumTransformSkipY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
-
- if (absSumTransformSkipY)
+ uint32_t numSigTSkipY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, tsCoeffY,
+ log2TrSize, TEXT_LUMA, absPartIdx, true, curuseRDOQ);
+ cu->setCbfSubParts(numSigTSkipY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
+
+ if (numSigTSkipY)
{
m_entropyCoder->resetBits();
m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
@@ -3348,11 +3304,7 @@
const uint32_t skipSingleBitsY = m_entropyCoder->getNumberOfWrittenBits();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
-
- int scalingListType = 3 + TEXT_LUMA;
- X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
-
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, tsResiY, trSize, tsCoeffY, trSize, scalingListType, true, lastPosTransformSkip[TEXT_LUMA][0]);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width,
tsResiY, trSize);
@@ -3375,7 +3327,7 @@
singleCostY = m_rdCost->calcRdCost(nonZeroDistY, skipSingleBitsY);
}
- if (!absSumTransformSkipY || minCost[TEXT_LUMA][0] < singleCostY)
+ if (!numSigTSkipY || minCost[TEXT_LUMA][0] < singleCostY)
{
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
}
@@ -3383,18 +3335,18 @@
{
singleDistComp[TEXT_LUMA][0] = nonZeroDistY;
singlePsyEnergyComp[TEXT_LUMA][0] = nonZeroPsyEnergyY;
- absSum[TEXT_LUMA][0] = absSumTransformSkipY;
+ numSigY = numSigTSkipY;
bestTransformMode[TEXT_LUMA][0] = 1;
memcpy(coeffCurY, tsCoeffY, sizeof(coeff_t) * numCoeffY);
primitives.square_copy_ss[sizeIdx](curResiY, strideResiY, tsResiY, trSize);
}
- cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
+ cu->setCbfSubParts(numSigY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
}
if (bCodeChroma && checkTransformSkipUV)
{
- uint32_t nonZeroDistU = 0, nonZeroDistV = 0, absSumTransformSkipU, absSumTransformSkipV;
+ uint32_t nonZeroDistU = 0, nonZeroDistV = 0;
uint32_t nonZeroPsyEnergyU = 0, nonZeroPsyEnergyV = 0;
uint64_t singleCostU = MAX_INT64;
uint64_t singleCostV = MAX_INT64;
@@ -3429,20 +3381,20 @@
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
- absSumTransformSkipU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, tsCoeffU,
- trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section], true, curuseRDOQ);
+ uint32_t numSigTSkipU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, tsCoeffU,
+ log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, true, curuseRDOQ);
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
- absSumTransformSkipV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, tsCoeffV,
- trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section], true, curuseRDOQ);
-
- cu->setCbfPartRange(absSumTransformSkipU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
- cu->setCbfPartRange(absSumTransformSkipV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
+ uint32_t numSigTSkipV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, tsCoeffV,
+ log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, true, curuseRDOQ);
+
+ cu->setCbfPartRange(numSigTSkipU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
+ cu->setCbfPartRange(numSigTSkipV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
m_entropyCoder->resetBits();
singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = 0;
- if (absSumTransformSkipU)
+ if (numSigTSkipU)
{
m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_U, trMode);
m_entropyCoder->encodeCoeffNxN(cu, tsCoeffU, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
@@ -3450,11 +3402,8 @@
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-
- int scalingListType = 3 + TEXT_CHROMA_U;
- X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, tsResiU, trSizeC, tsCoeffU,
- trSizeC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section]);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), tsResiU, trSizeC, tsCoeffU,
+ log2TrSizeC, TEXT_CHROMA_U, false, true, numSigTSkipU);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
tsResiU, trSizeC);
nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
@@ -3476,7 +3425,7 @@
singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
}
- if (!absSumTransformSkipU || minCost[TEXT_CHROMA_U][tuIterator.m_section] < singleCostU)
+ if (!numSigTSkipU || minCost[TEXT_CHROMA_U][tuIterator.m_section] < singleCostU)
{
cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
}
@@ -3484,13 +3433,13 @@
{
singleDistComp[TEXT_CHROMA_U][tuIterator.m_section] = nonZeroDistU;
singlePsyEnergyComp[TEXT_CHROMA_U][tuIterator.m_section] = nonZeroPsyEnergyU;
- absSum[TEXT_CHROMA_U][tuIterator.m_section] = absSumTransformSkipU;
+ numSigU[tuIterator.m_section] = numSigTSkipU;
bestTransformMode[TEXT_CHROMA_U][tuIterator.m_section] = 1;
memcpy(coeffCurU + subTUOffset, tsCoeffU, sizeof(coeff_t) * numCoeffC);
primitives.square_copy_ss[sizeIdxC](curResiU, strideResiC, tsResiU, trSizeC);
}
- if (absSumTransformSkipV)
+ if (numSigTSkipV)
{
m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_V, trMode);
m_entropyCoder->encodeCoeffNxN(cu, tsCoeffV, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
@@ -3498,11 +3447,8 @@
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-
- int scalingListType = 3 + TEXT_CHROMA_V;
- X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, tsResiV, trSizeC, tsCoeffV,
- trSizeC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section]);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), tsResiV, trSizeC, tsCoeffV,
+ log2TrSizeC, TEXT_CHROMA_V, false, true, numSigTSkipV);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
tsResiV, trSizeC);
nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
@@ -3524,7 +3470,7 @@
singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
}
- if (!absSumTransformSkipV || minCost[TEXT_CHROMA_V][tuIterator.m_section] < singleCostV)
+ if (!numSigTSkipV || minCost[TEXT_CHROMA_V][tuIterator.m_section] < singleCostV)
{
cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
}
@@ -3532,14 +3478,14 @@
{
singleDistComp[TEXT_CHROMA_V][tuIterator.m_section] = nonZeroDistV;
singlePsyEnergyComp[TEXT_CHROMA_V][tuIterator.m_section] = nonZeroPsyEnergyV;
- absSum[TEXT_CHROMA_V][tuIterator.m_section] = absSumTransformSkipV;
+ numSigV[tuIterator.m_section] = numSigTSkipV;
bestTransformMode[TEXT_CHROMA_V][tuIterator.m_section] = 1;
memcpy(coeffCurV + subTUOffset, tsCoeffV, sizeof(coeff_t) * numCoeffC);
primitives.square_copy_ss[sizeIdxC](curResiV, strideResiC, tsResiV, trSizeC);
}
- cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
- cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
+ cu->setCbfPartRange(numSigU[tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
+ cu->setCbfPartRange(numSigV[tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
}
while (isNextSection(&tuIterator));
@@ -3568,16 +3514,16 @@
}
m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
- if (absSum[TEXT_LUMA][0])
+ if (numSigY)
m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
if (bCodeChroma)
{
if (!splitIntoSubTUs)
{
- if (absSum[TEXT_CHROMA_U][0])
+ if (numSigU[0])
m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
- if (absSum[TEXT_CHROMA_V][0])
+ if (numSigV[0])
m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
}
else
@@ -3585,13 +3531,13 @@
uint32_t subTUSize = 1 << (log2TrSizeC * 2);
uint32_t partIdxesPerSubTU = absPartIdxStep >> 1;
- if (absSum[TEXT_CHROMA_U][0])
+ if (numSigU[0])
m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
- if (absSum[TEXT_CHROMA_U][1])
+ if (numSigU[1])
m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_U);
- if (absSum[TEXT_CHROMA_V][0])
+ if (numSigV[0])
m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
- if (absSum[TEXT_CHROMA_V][1])
+ if (numSigV[1])
m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_V);
}
}
@@ -3734,7 +3680,7 @@
cu->m_psyEnergy = singlePsyEnergy;
cu->setTrIdxSubParts(trMode, absPartIdx, depth);
- cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
+ cu->setCbfSubParts(numSigY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
if (bCodeChroma)
{
diff -r 11c808e562b8 -r dcf6f2ce907c source/common/dct.cpp
--- a/source/common/dct.cpp Thu Jul 03 15:12:45 2014 -0700
+++ b/source/common/dct.cpp Mon Jul 07 17:00:26 2014 +0900
@@ -773,10 +773,10 @@
}
}
-uint32_t quant_c(int32_t* coef, int32_t* quantCoeff, int32_t* deltaU, int32_t* qCoef, int qBits, int add, int numCoeff, int32_t* lastPos)
+uint32_t quant_c(int32_t* coef, int32_t* quantCoeff, int32_t* deltaU, int32_t* qCoef, int qBits, int add, int numCoeff)
{
int qBits8 = qBits - 8;
- uint32_t acSum = 0;
+ uint32_t numSig = 0;
for (int blockpos = 0; blockpos < numCoeff; blockpos++)
{
@@ -785,15 +785,14 @@
int tmplevel = abs(level) * quantCoeff[blockpos];
level = ((tmplevel + add) >> qBits);
+ deltaU[blockpos] = ((tmplevel - (level << qBits)) >> qBits8);
if (level)
- *lastPos = blockpos;
- deltaU[blockpos] = ((tmplevel - (level << qBits)) >> qBits8);
- acSum += level;
+ ++numSig;
level *= sign;
qCoef[blockpos] = Clip3(-32768, 32767, level);
}
- return acSum;
+ return numSig;
}
uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* scaledCoeff, int32_t* qCoef, int qBits, int add, int numCoeff)
diff -r 11c808e562b8 -r dcf6f2ce907c source/common/primitives.h
--- a/source/common/primitives.h Thu Jul 03 15:12:45 2014 -0700
+++ b/source/common/primitives.h Mon Jul 07 17:00:26 2014 +0900
@@ -146,7 +146,7 @@
typedef void (*calcresidual_t)(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
-typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
+typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
diff -r 11c808e562b8 -r dcf6f2ce907c source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Thu Jul 03 15:12:45 2014 -0700
+++ b/source/common/x86/pixel-util.h Mon Jul 07 17:00:26 2014 +0900
@@ -44,7 +44,7 @@
void x265_transpose32_sse2(pixel *dest, pixel *src, intptr_t stride);
void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride);
-uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
+uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
int x265_count_nonzero_ssse3(const int32_t *quantCoeff, int numCoeff);
diff -r 11c808e562b8 -r dcf6f2ce907c source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Thu Jul 03 15:12:45 2014 -0700
+++ b/source/common/x86/pixel-util8.asm Mon Jul 07 17:00:26 2014 +0900
@@ -27,8 +27,6 @@
SECTION_RODATA 32
-c_d_4: dd 4, 4, 4, 4
-c_d_1234: dd 1, 2, 3, 4
%if BIT_DEPTH == 10
ssim_c1: times 4 dd 6697.7856 ; .01*.01*1023*1023*64
ssim_c2: times 4 dd 3797644.4352 ; .03*.03*1023*1023*64*63
@@ -864,42 +862,25 @@
;-----------------------------------------------------------------------------
-; uint32_t quant(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
+; uint32_t quant(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
;-----------------------------------------------------------------------------
INIT_XMM sse4
-%if ARCH_X86_64 == 1
-cglobal quant, 5,6,11
- %define addVec m8
- %define qbits m9
- %define qbits8 m10
-%else
-cglobal quant, 5,6,8, 0-(3*mmsize)
- %define addVec [rsp + 0 * mmsize]
- %define qbits [rsp + 1 * mmsize]
- %define qbits8 [rsp + 2 * mmsize]
-%endif
+cglobal quant, 5,6,8
; fill qbits
- movd m0, r4d
- mova qbits, m0
+ movd m4, r4d ; m4 = qbits
; fill qbits-8
sub r4d, 8
- movd m0, r4d
- mova qbits8, m0
+ movd m6, r4d ; m6 = qbits8
; fill offset
- mov r4d, r5m
- movd m0, r4d
- pshufd m0, m0, 0
- mova addVec, m0
+ movd m5, r5m
+ pshufd m5, m5, 0 ; m5 = add
mov r4d, r6m
shr r4d, 3
- pxor m7, m7 ; m7 = acSum4
- mova m6, [c_d_1234] ; m6 = last4
- pxor m5, m5 ; m5 = count
- mova m4, [c_d_4] ; m4 = [4 4 4 4]
+ pxor m7, m7 ; m7 = numZero
.loop:
; 4 coeff
movu m0, [r0] ; m0 = level
@@ -908,19 +889,15 @@
movu m2, [r1] ; m2 = qcoeff
pabsd m0, m0
pmulld m0, m2 ; m0 = tmpLevel1
- paddd m2, m0, addVec
- psrad m2, qbits ; m2 = level1
- paddd m7, m2
- pslld m3, m2, qbits
+ paddd m2, m0, m5
+ psrad m2, m4 ; m2 = level1
+ pslld m3, m2, m4
psubd m0, m3
- psrad m0, qbits8 ; m0 = deltaU1
+ psrad m0, m6 ; m0 = deltaU1
movu [r2], m0
pxor m0, m0
pcmpeqd m0, m2 ; m0 = mask4
- pand m5, m0
- pandn m0, m6
- por m5, m0
- paddd m6, m4
+ psubd m7, m0
pxor m2, m1
psubd m2, m1
@@ -934,19 +911,15 @@
movu m2, [r1 + 16] ; m2 = qcoeff
pabsd m0, m0
pmulld m0, m2 ; m0 = tmpLevel1
- paddd m2, m0, addVec
- psrad m2, qbits ; m2 = level1
- paddd m7, m2
- pslld m3, m2, qbits
+ paddd m2, m0, m5
+ psrad m2, m4 ; m2 = level1
+ pslld m3, m2, m4
psubd m0, m3
- psrad m0, qbits8 ; m0 = deltaU1
+ psrad m0, m6 ; m0 = deltaU1
movu [r2 + 16], m0
pxor m0, m0
pcmpeqd m0, m2 ; m0 = mask4
- pand m5, m0
- pandn m0, m6
- por m5, m0
- paddd m6, m4
+ psubd m7, m0
pxor m2, m1
psubd m2, m1
@@ -962,18 +935,11 @@
dec r4d
jnz .loop
- movhlps m4, m5
- pmaxud m4, m5
- pshufd m5, m4, 1
- pmaxud m4, m5
-
- mov r4, r7m
- movd [r4], m4
- dec dword [r4]
-
phaddd m7, m7
phaddd m7, m7
- movd eax, m7
+ mov eax, r6m
+ movd r4d, m7
+ sub eax, r4d ; numSig
RET
@@ -985,11 +951,11 @@
cglobal nquant, 5,6,8
; fill qbits
- movd m5, r4d ; m5 = qbits
+ movd m4, r4d ; m4 = qbits
; fill offset
- movd m6, r5m
- pshufd m6, m6, 0 ; m6 = add
+ movd m5, r5m
+ pshufd m5, m5, 0 ; m5 = add
mov r4d, r6m
shr r4d, 3
@@ -1003,10 +969,11 @@
pabsd m0, m0
pmulld m0, m2 ; m0 = tmpLevel1
movu [r2], m0 ; m0 = scaledCoeff
- paddd m2, m0, m6
- psrad m2, m5 ; m2 = level1
- pxor m4, m4
- pcmpeqd m4, m2 ; m4 = mask4
+ paddd m2, m0, m5
+ psrad m2, m4 ; m2 = level1
+ pxor m0, m0
+ pcmpeqd m0, m2 ; m0 = mask4
+ psubd m7, m0
pxor m2, m1
psubd m2, m1
@@ -1021,10 +988,11 @@
pabsd m0, m0
pmulld m0, m2 ; m0 = tmpLevel1
movu [r2 + 16], m0 ; m0 = scaledCoeff
- paddd m2, m0, m6
- psrad m2, m5 ; m2 = level1
+ paddd m2, m0, m5
+ psrad m2, m4 ; m2 = level1
pxor m0, m0
pcmpeqd m0, m2 ; m0 = mask4
+ psubd m7, m0
pxor m2, m1
psubd m2, m1
@@ -1032,9 +1000,6 @@
pmovsxwd m2, m2
movu [r3 + 16], m2
- packssdw m4, m0 ; m4 = mask8
- psubw m7, m4 ; m7 = numZero
-
add r0, 32
add r1, 32
add r2, 32
@@ -1043,11 +1008,10 @@
dec r4d
jnz .loop
- packuswb m7, m7
- pxor m0, m0
- psadbw m0, m7
+ phaddd m7, m7
+ phaddd m7, m7
mov eax, r6m
- movd r4d, m0
+ movd r4d, m7
sub eax, r4d ; numSig
RET
diff -r 11c808e562b8 -r dcf6f2ce907c source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu Jul 03 15:12:45 2014 -0700
+++ b/source/encoder/encoder.cpp Mon Jul 07 17:00:26 2014 +0900
@@ -1052,8 +1052,6 @@
sps->setTMVPFlagsPresent(false);
- sps->setMaxTrSize(1 << m_quadtreeTULog2MaxSize);
-
for (uint32_t i = 0; i < g_maxCUDepth - g_addCUDepth; i++)
{
sps->setAMPAcc(i, m_param->bEnableAMP);
diff -r 11c808e562b8 -r dcf6f2ce907c source/test/mbdstharness.cpp
--- a/source/test/mbdstharness.cpp Thu Jul 03 15:12:45 2014 -0700
+++ b/source/test/mbdstharness.cpp Mon Jul 07 17:00:26 2014 +0900
@@ -300,13 +300,12 @@
int valueToAdd = rand() % (32 * 1024);
int cmp_size = sizeof(int) * height * width;
int numCoeff = height * width;
- int optLastPos = -1, refLastPos = -1;
int index1 = rand() % TEST_CASES;
int index2 = rand() % TEST_CASES;
- refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf5, mintbuf6, bits, valueToAdd, numCoeff, &refLastPos);
- optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf3, mintbuf4, bits, valueToAdd, numCoeff, &optLastPos);
+ refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf5, mintbuf6, bits, valueToAdd, numCoeff);
+ optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf3, mintbuf4, bits, valueToAdd, numCoeff);
if (memcmp(mintbuf3, mintbuf5, cmp_size))
return false;
@@ -317,9 +316,6 @@
if (optReturnValue != refReturnValue)
return false;
- if (optLastPos != refLastPos)
- return false;
-
reportfail();
j += 16;
}
@@ -509,8 +505,7 @@
if (opt.quant)
{
printf("quant\t\t");
- int dummy = -1;
- REPORT_SPEEDUP(opt.quant, ref.quant, mintbuf1, mintbuf2, mintbuf3, mintbuf4, 23, 23785, 32 * 32, &dummy);
+ REPORT_SPEEDUP(opt.quant, ref.quant, mintbuf1, mintbuf2, mintbuf3, mintbuf4, 23, 23785, 32 * 32);
}
if (opt.nquant)
More information about the x265-devel
mailing list