[x265-commits] [x265] tune: ensure lookahead is disabled for zero-latency
Steve Borho
steve at borho.org
Sat Mar 1 06:35:33 CET 2014
details: http://hg.videolan.org/x265/rev/61c752e11424
branches:
changeset: 6329:61c752e11424
user: Steve Borho <steve at borho.org>
date: Thu Feb 27 20:32:18 2014 -0600
description:
tune: ensure lookahead is disabled for zero-latency
Subject: [x265] asm: enable count_nonzero for HIGH_BIT_DEPTH
details: http://hg.videolan.org/x265/rev/df831b319c08
branches:
changeset: 6330:df831b319c08
user: Satoshi Nakagawa <nakagawa424 at oki.com>
date: Fri Feb 28 10:40:17 2014 +0900
description:
asm: enable count_nonzero for HIGH_BIT_DEPTH
Subject: [x265] square transform only
details: http://hg.videolan.org/x265/rev/9b43c262124e
branches:
changeset: 6331:9b43c262124e
user: Satoshi Nakagawa <nakagawa424 at oki.com>
date: Fri Feb 28 11:12:57 2014 +0900
description:
square transform only
Subject: [x265] api: make log-level 4 semi-official and expose in public API
details: http://hg.videolan.org/x265/rev/994f046a8111
branches: stable
changeset: 6332:994f046a8111
user: Steve Borho <steve at borho.org>
date: Thu Feb 27 21:40:52 2014 -0600
description:
api: make log-level 4 semi-official and expose in public API
Subject: [x265] tcompicyuv: initialize NULL to cu and bu offset buffers.
details: http://hg.videolan.org/x265/rev/f6d079ad85bc
branches: stable
changeset: 6333:f6d079ad85bc
user: Gopu Govindaswamy
date: Fri Feb 28 13:00:01 2014 -0800
description:
tcompicyuv: initialize NULL to cu and bu offset buffers.
Subject: [x265] Merge with stable
details: http://hg.videolan.org/x265/rev/5e9559d366b3
branches:
changeset: 6334:5e9559d366b3
user: Steve Borho <steve at borho.org>
date: Fri Feb 28 01:41:43 2014 -0600
description:
Merge with stable
Subject: [x265] weightp: use struct to cache data for reuse, refactor MC of reference planes
details: http://hg.videolan.org/x265/rev/518313140b03
branches:
changeset: 6335:518313140b03
user: Kavitha Sampath <kavitha at multicorewareinc.com>
date: Fri Feb 28 12:28:22 2014 +0530
description:
weightp: use struct to cache data for reuse, refactor MC of reference planes
* do not consider intra/mv cost during MC phase
* unconditionally motion-compensate luma and chroma blocks
* include slice header cost estimate in weight analysis
* weightCost() needed different paths for luma, chroma, and chroma444
* pass a single stride to weightCost()
Subject: [x265] asm: split SAO_EO_0 into separate primitive func
details: http://hg.videolan.org/x265/rev/000f86d72337
branches:
changeset: 6336:000f86d72337
user: Praveen Tiwari
date: Fri Feb 28 12:17:17 2014 +0530
description:
asm: split SAO_EO_0 into separate primitive func
added assembly code and testbench support
added loopfilter.cpp, loopfilter.h, loopfilter.asm files
diffstat:
source/Lib/TLibCommon/TComPicYuv.cpp | 5 +
source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp | 66 +-
source/Lib/TLibCommon/TComSampleAdaptiveOffset.h | 4 +-
source/Lib/TLibCommon/TComTrQuant.cpp | 99 +-
source/Lib/TLibCommon/TComTrQuant.h | 12 +-
source/Lib/TLibEncoder/TEncSearch.cpp | 52 +-
source/common/CMakeLists.txt | 11 +-
source/common/bitstream.h | 70 ++
source/common/common.cpp | 3 +
source/common/loopfilter.cpp | 52 +
source/common/param.cpp | 6 +-
source/common/primitives.cpp | 4 +-
source/common/primitives.h | 5 +-
source/common/x86/asm-primitives.cpp | 4 +
source/common/x86/loopfilter.asm | 85 +++
source/common/x86/loopfilter.h | 29 +
source/encoder/encoder.cpp | 2 +-
source/encoder/weightPrediction.cpp | 578 +++++++++++---------
source/test/pixelharness.cpp | 52 +-
source/test/pixelharness.h | 3 +-
source/x265.h | 3 +-
21 files changed, 765 insertions(+), 380 deletions(-)
diffs (truncated from 1998 to 300 lines):
diff -r 8189f9e9a39f -r 000f86d72337 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp Thu Feb 27 19:05:54 2014 -0600
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp Fri Feb 28 12:17:17 2014 +0530
@@ -57,6 +57,11 @@ TComPicYuv::TComPicYuv()
m_picOrgY = NULL; // m_apiPicBufY + m_iMarginLuma*getStride() + m_iMarginLuma
m_picOrgU = NULL;
m_picOrgV = NULL;
+
+ m_cuOffsetY = NULL;
+ m_cuOffsetC = NULL;
+ m_buOffsetY = NULL;
+ m_buOffsetC = NULL;
}
TComPicYuv::~TComPicYuv()
diff -r 8189f9e9a39f -r 000f86d72337 source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp
--- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp Thu Feb 27 19:05:54 2014 -0600
+++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp Fri Feb 28 12:17:17 2014 +0530
@@ -44,7 +44,6 @@
namespace x265 {
//! \ingroup TLibCommon
//! \{
-
SAOParam::~SAOParam()
{
for (int i = 0; i < 3; i++)
@@ -535,12 +534,10 @@ void TComSampleAdaptiveOffset::processSa
uint32_t tpely = tmpCu->getCUPelY();
uint32_t rpelx;
uint32_t bpely;
- int signLeft;
- int signRight;
+ int edgeType;
int signDown;
int signDown1;
int signDown2;
- uint32_t edgeType;
int picWidthTmp;
int picHeightTmp;
int startX;
@@ -614,23 +611,56 @@ void TComSampleAdaptiveOffset::processSa
{
case SAO_EO_0: // dir: -
{
- startX = (lpelx == 0) ? 1 : 0;
- endX = (rpelx == picWidthTmp) ? lcuWidth - 1 : lcuWidth;
- for (y = 0; y < lcuHeight; y++)
- {
- signLeft = xSign(rec[startX] - tmpL[y]);
- for (x = startX; x < endX; x++)
- {
- signRight = xSign(rec[x] - rec[x + 1]);
- edgeType = signRight + signLeft + 2;
- signLeft = -signRight;
+ pixel firstPxl = 0, lastPxl = 0;
+ startX = (lpelx == 0) ? 1 : 0;
+ endX = (rpelx == picWidthTmp) ? lcuWidth-1 : lcuWidth;
+ if (lcuWidth % 16)
+ {
+ int8_t signRight;
+ for (y = 0; y < lcuHeight; y++)
+ {
+ int8_t signLeft = xSign(rec[startX] - tmpL[y]);
+ for (x = startX; x < endX; x++)
+ {
+ signRight = xSign(rec[x] - rec[x+1]);
+ edgeType = signRight + signLeft + 2;
+ signLeft = -signRight;
- rec[x] = clipTbl[rec[x] + m_offsetEo[edgeType]];
- }
+ rec[x] = Clip3(0, (1 << X265_DEPTH) - 1, rec[x] + m_offsetEo[edgeType]);
+ }
+ rec += stride;
+ }
+ }
+ else
+ {
+ for (y = 0; y < lcuHeight; y++)
+ {
+ int8_t signLeft = xSign(rec[startX] - tmpL[y]);
- rec += stride;
- }
+ if (lpelx == 0)
+ {
+ firstPxl = rec[0];
+ }
+ if (rpelx == picWidthTmp)
+ {
+ lastPxl = rec[lcuWidth - 1];
+ }
+
+ primitives.saoCuOrgE0(rec, m_offsetEo, lcuWidth, signLeft);
+
+ if (lpelx == 0)
+ {
+ rec[0] = firstPxl;
+ }
+
+ if (rpelx == picWidthTmp)
+ {
+ rec[lcuWidth - 1] = lastPxl;
+ }
+ rec += stride;
+ }
+ }
break;
}
case SAO_EO_1: // dir: |
diff -r 8189f9e9a39f -r 000f86d72337 source/Lib/TLibCommon/TComSampleAdaptiveOffset.h
--- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.h Thu Feb 27 19:05:54 2014 -0600
+++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.h Fri Feb 28 12:17:17 2014 +0530
@@ -143,11 +143,9 @@ protected:
static const int m_numCulPartsLevel[5];
static const uint32_t m_eoTable[9];
static const int m_numClass[MAX_NUM_SAO_TYPE];
-
int32_t *m_offsetBo;
int32_t *m_chromaOffsetBo;
- int m_offsetEo[LUMA_GROUP_NUM];
-
+ int8_t m_offsetEo[LUMA_GROUP_NUM];
int m_picWidth;
int m_picHeight;
uint32_t m_maxSplitLevel;
diff -r 8189f9e9a39f -r 000f86d72337 source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Thu Feb 27 19:05:54 2014 -0600
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Fri Feb 28 12:17:17 2014 +0530
@@ -131,14 +131,13 @@ void TComTrQuant::setQPforQuant(int qpy,
// To minimize the distortion only. No rate is considered.
void TComTrQuant::signBitHidingHDQ(TCoeff* qCoef, TCoeff* coef, int32_t* deltaU, const TUEntropyCodingParameters &codingParameters)
{
- const uint32_t width = codingParameters.widthInGroups << MLS_CG_LOG2_WIDTH;
- const uint32_t height = codingParameters.heightInGroups << MLS_CG_LOG2_HEIGHT;
+ const uint32_t trSize = codingParameters.widthInGroups << MLS_CG_LOG2_WIDTH;
int lastCG = -1;
int absSum = 0;
int n;
- for (int subSet = (width * height - 1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet--)
+ for (int subSet = (trSize * trSize - 1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet--)
{
int subPos = subSet << LOG2_SCAN_SET_SIZE;
int firstNZPosInCG = SCAN_SET_SIZE, lastNZPosInCG = -1;
@@ -255,29 +254,27 @@ void TComTrQuant::signBitHidingHDQ(TCoef
} // TU loop
}
-uint32_t TComTrQuant::xQuant(TComDataCU* cu, int32_t* coef, TCoeff* qCoef, int width, int height,
+uint32_t TComTrQuant::xQuant(TComDataCU* cu, int32_t* coef, TCoeff* qCoef, int trSize,
TextType ttype, uint32_t absPartIdx, int32_t *lastPos, bool curUseRDOQ)
{
uint32_t acSum = 0;
int add = 0;
bool useRDOQ = (cu->getTransformSkip(absPartIdx, ttype) ? m_useRDOQTS : m_useRDOQ) && curUseRDOQ;
- assert(width == height);
-
#if _MSC_VER
#pragma warning(disable: 4127) // conditional expression is constant
#endif
if (useRDOQ && (ttype == TEXT_LUMA || RDOQ_CHROMA))
{
- acSum = xRateDistOptQuant(cu, coef, qCoef, width, height, ttype, absPartIdx, lastPos);
+ acSum = xRateDistOptQuant(cu, coef, qCoef, trSize, ttype, absPartIdx, lastPos);
}
else
{
TUEntropyCodingParameters codingParameters;
- getTUEntropyCodingParameters(cu, codingParameters, absPartIdx, width, height, ttype);
+ getTUEntropyCodingParameters(cu, codingParameters, absPartIdx, trSize, trSize, ttype);
int deltaU[32 * 32];
- uint32_t log2TrSize = g_convertToBit[width] + 2;
+ const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
int scalingListType = (cu->isIntra(absPartIdx) ? 0 : 3) + ttype;
assert(scalingListType < 6);
int32_t *quantCoeff = 0;
@@ -288,7 +285,7 @@ uint32_t TComTrQuant::xQuant(TComDataCU*
int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift;
add = (cu->getSlice()->getSliceType() == I_SLICE ? 171 : 85) << (qbits - 9);
- int numCoeff = width * height;
+ int numCoeff = trSize * trSize;
acSum += primitives.quant(coef, quantCoeff, deltaU, qCoef, qbits, add, numCoeff, lastPos);
if (cu->getSlice()->getPPS()->getSignHideFlag() && acSum >= 2)
@@ -311,8 +308,7 @@ uint32_t TComTrQuant::transformNxN(TComD
int16_t* residual,
uint32_t stride,
TCoeff* coeff,
- uint32_t width,
- uint32_t height,
+ uint32_t trSize,
TextType ttype,
uint32_t absPartIdx,
int32_t* lastPos,
@@ -322,11 +318,11 @@ uint32_t TComTrQuant::transformNxN(TComD
if (cu->getCUTransquantBypass(absPartIdx))
{
uint32_t absSum = 0;
- for (uint32_t k = 0; k < height; k++)
+ for (uint32_t k = 0; k < trSize; k++)
{
- for (uint32_t j = 0; j < width; j++)
+ for (uint32_t j = 0; j < trSize; j++)
{
- coeff[k * width + j] = ((int16_t)residual[k * stride + j]);
+ coeff[k * trSize + j] = ((int16_t)residual[k * stride + j]);
absSum += abs(residual[k * stride + j]);
}
}
@@ -344,29 +340,29 @@ uint32_t TComTrQuant::transformNxN(TComD
mode = REG_DCT;
}
- assert((cu->getSlice()->getSPS()->getMaxTrSize() >= width));
+ assert((cu->getSlice()->getSPS()->getMaxTrSize() >= trSize));
if (useTransformSkip)
{
- xTransformSkip(residual, stride, m_tmpCoeff, width, height);
+ xTransformSkip(residual, stride, m_tmpCoeff, trSize);
}
else
{
// TODO: this may need larger data types for X265_DEPTH > 8
- const uint32_t log2BlockSize = g_convertToBit[width];
- primitives.dct[DCT_4x4 + log2BlockSize - ((width == 4) && (mode != REG_DCT))](residual, m_tmpCoeff, stride);
+ const uint32_t log2BlockSize = g_convertToBit[trSize];
+ primitives.dct[DCT_4x4 + log2BlockSize - ((trSize == 4) && (mode != REG_DCT))](residual, m_tmpCoeff, stride);
}
- return xQuant(cu, m_tmpCoeff, coeff, width, height, ttype, absPartIdx, lastPos, curUseRDOQ);
+ return xQuant(cu, m_tmpCoeff, coeff, trSize, ttype, absPartIdx, lastPos, curUseRDOQ);
}
-void TComTrQuant::invtransformNxN(bool transQuantBypass, uint32_t mode, int16_t* residual, uint32_t stride, TCoeff* coeff, uint32_t width, uint32_t height, int scalingListType, bool useTransformSkip, int lastPos)
+void TComTrQuant::invtransformNxN(bool transQuantBypass, uint32_t mode, int16_t* residual, uint32_t stride, TCoeff* coeff, uint32_t trSize, int scalingListType, bool useTransformSkip, int lastPos)
{
if (transQuantBypass)
{
- for (uint32_t k = 0; k < height; k++)
+ for (uint32_t k = 0; k < trSize; k++)
{
- for (uint32_t j = 0; j < width; j++)
+ for (uint32_t j = 0; j < trSize; j++)
{
- residual[k * stride + j] = (int16_t)(coeff[k * width + j]);
+ residual[k * stride + j] = (int16_t)(coeff[k * trSize + j]);
}
}
@@ -377,7 +373,7 @@ void TComTrQuant::invtransformNxN(bool t
int per = m_qpParam.m_per;
int rem = m_qpParam.m_rem;
bool useScalingList = getUseScalingList();
- uint32_t log2TrSize = g_convertToBit[width] + 2;
+ const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
int32_t *dequantCoef = getDequantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2);
@@ -386,30 +382,30 @@ void TComTrQuant::invtransformNxN(bool t
{
static const int invQuantScales[6] = { 40, 45, 51, 57, 64, 72 };
int scale = invQuantScales[rem] << per;
- primitives.dequant_normal(coeff, m_tmpCoeff, width * height, scale, shift);
+ primitives.dequant_normal(coeff, m_tmpCoeff, trSize * trSize, scale, shift);
}
else
{
// CHECK_ME: the code is not verify since this is DEAD path
- primitives.dequant_scaling(coeff, dequantCoef, m_tmpCoeff, width * height, per, shift);
+ primitives.dequant_scaling(coeff, dequantCoef, m_tmpCoeff, trSize * trSize, per, shift);
}
if (useTransformSkip == true)
{
- xITransformSkip(m_tmpCoeff, residual, stride, width, height);
+ xITransformSkip(m_tmpCoeff, residual, stride, trSize);
}
else
{
// CHECK_ME: we can't here when no any coeff
assert(lastPos >= 0);
- const uint32_t log2BlockSize = g_convertToBit[width];
+ const uint32_t log2BlockSize = log2TrSize - 2;
#if HIGH_BIT_DEPTH
lastPos = !lastPos; // prevent warning
#else
// DC only
- if (lastPos == 0 && !((width == 4) && (mode != REG_DCT)))
+ if (lastPos == 0 && !((trSize == 4) && (mode != REG_DCT)))
{
int dc_val = (((m_tmpCoeff[0] * 64 + 64) >> 7) * 64 + 2048) >> 12;
primitives.blockfill_s[log2BlockSize](residual, stride, dc_val);
@@ -419,7 +415,7 @@ void TComTrQuant::invtransformNxN(bool t
More information about the x265-commits
mailing list