[x265] [PATCH] rc: add support for qg-size 8
gopi.satykrishna at multicorewareinc.com
gopi.satykrishna at multicorewareinc.com
Tue Aug 23 07:44:25 CEST 2016
# HG changeset patch
# User Gopi Satykrishna Akisetty <gopi.satykrishna at multicorewareinc.com>
# Date 1471411031 -19800
# Wed Aug 17 10:47:11 2016 +0530
# Node ID 74d189cfdc36c061ce6951533d47cb7404b327b1
# Parent 49a0d1176aef5bc6330fcfd39b4589616c174f0a
rc: add support for qg-size 8
diff -r 49a0d1176aef -r 74d189cfdc36 source/common/common.h
--- a/source/common/common.h Wed Jul 27 21:47:20 2016 +0200
+++ b/source/common/common.h Wed Aug 17 10:47:11 2016 +0530
@@ -81,6 +81,7 @@
#elif defined(_MSC_VER)
+#define ALIGN_VAR_4(T, var) __declspec(align(4)) T var
#define ALIGN_VAR_8(T, var) __declspec(align(8)) T var
#define ALIGN_VAR_16(T, var) __declspec(align(16)) T var
#define ALIGN_VAR_32(T, var) __declspec(align(32)) T var
diff -r 49a0d1176aef -r 74d189cfdc36 source/common/frame.cpp
--- a/source/common/frame.cpp Wed Jul 27 21:47:20 2016 +0200
+++ b/source/common/frame.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -62,7 +62,7 @@
if (quantOffsets)
{
- int32_t cuCount = m_lowres.maxBlocksInRow * m_lowres.maxBlocksInCol;
+ int32_t cuCount = m_lowres.maxBlocksInRowFullRes * m_lowres.maxBlocksInColFullRes;
m_quantOffsets = new float[cuCount];
}
return true;
diff -r 49a0d1176aef -r 74d189cfdc36 source/common/lowres.cpp
--- a/source/common/lowres.cpp Wed Jul 27 21:47:20 2016 +0200
+++ b/source/common/lowres.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -38,7 +38,10 @@
lumaStride += 32 - (lumaStride & 31);
maxBlocksInRow = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
maxBlocksInCol = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+ maxBlocksInRowFullRes = maxBlocksInRow * 2;
+ maxBlocksInColFullRes = maxBlocksInCol * 2;
int cuCount = maxBlocksInRow * maxBlocksInCol;
+ int cuCountFullRes = maxBlocksInRowFullRes * maxBlocksInColFullRes;
/* rounding the width to multiple of lowres CU size */
width = maxBlocksInRow * X265_LOWRES_CU_SIZE;
@@ -49,10 +52,10 @@
if (bAQEnabled)
{
- CHECKED_MALLOC(qpAqOffset, double, cuCount);
- CHECKED_MALLOC(invQscaleFactor, int, cuCount);
- CHECKED_MALLOC(qpCuTreeOffset, double, cuCount);
- CHECKED_MALLOC(blockVariance, uint32_t, cuCount);
+ CHECKED_MALLOC(qpAqOffset, double, cuCountFullRes);
+ CHECKED_MALLOC(invQscaleFactor, int, cuCountFullRes);
+ CHECKED_MALLOC(qpCuTreeOffset, double, cuCountFullRes);
+ CHECKED_MALLOC(blockVariance, uint32_t, cuCountFullRes);
}
CHECKED_MALLOC(propagateCost, uint16_t, cuCount);
diff -r 49a0d1176aef -r 74d189cfdc36 source/common/lowres.h
--- a/source/common/lowres.h Wed Jul 27 21:47:20 2016 +0200
+++ b/source/common/lowres.h Wed Aug 17 10:47:11 2016 +0530
@@ -132,6 +132,8 @@
MV* lowresMvs[2][X265_BFRAME_MAX + 1];
uint32_t maxBlocksInRow;
uint32_t maxBlocksInCol;
+ uint32_t maxBlocksInRowFullRes;
+ uint32_t maxBlocksInColFullRes;
/* used for vbvLookahead */
int plannedType[X265_LOOKAHEAD_MAX + 1];
diff -r 49a0d1176aef -r 74d189cfdc36 source/common/pixel.cpp
--- a/source/common/pixel.cpp Wed Jul 27 21:47:20 2016 +0200
+++ b/source/common/pixel.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -853,7 +853,8 @@
{
int intraCost = intraCosts[i];
int interCost = X265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);
- double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8
+ int invQscaleFactor = (invQscales[i * 2] + invQscales[i * 2 + 1] + invQscales[i * 2 + len * 2] + invQscales[i * 2 + len * 2 + 1])/4;
+ double propagateIntra = intraCost * invQscaleFactor; // Q16 x Q8.8 = Q24.8
double propagateAmount = (double)propagateIn[i] + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0
double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0
diff -r 49a0d1176aef -r 74d189cfdc36 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Jul 27 21:47:20 2016 +0200
+++ b/source/common/x86/asm-primitives.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -2356,7 +2356,7 @@
ALL_CHROMA_444_PU(p2s, filterPixelToShort, sse2);
ALL_LUMA_PU(convert_p2s, filterPixelToShort, sse2);
ALL_LUMA_TU(count_nonzero, count_nonzero, sse2);
- p.propagateCost = PFX(mbtree_propagate_cost_sse2);
+ //p.propagateCost = PFX(mbtree_propagate_cost_sse2);
}
if (cpuMask & X265_CPU_SSE3)
{
@@ -2670,7 +2670,7 @@
p.pu[LUMA_48x64].copy_pp = PFX(blockcopy_pp_48x64_avx);
p.frameInitLowres = PFX(frame_init_lowres_core_avx);
- p.propagateCost = PFX(mbtree_propagate_cost_avx);
+ //p.propagateCost = PFX(mbtree_propagate_cost_avx);
}
if (cpuMask & X265_CPU_XOP)
{
@@ -3666,7 +3666,7 @@
p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vpp = PFX(interp_4tap_vert_pp_64x16_avx2);
p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
- p.propagateCost = PFX(mbtree_propagate_cost_avx2);
+ //p.propagateCost = PFX(mbtree_propagate_cost_avx2);
p.saoCuStatsE0 = PFX(saoCuStatsE0_avx2);
p.saoCuStatsE1 = PFX(saoCuStatsE1_avx2);
p.saoCuStatsE2 = PFX(saoCuStatsE2_avx2);
diff -r 49a0d1176aef -r 74d189cfdc36 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Wed Jul 27 21:47:20 2016 +0200
+++ b/source/encoder/analysis.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -2647,17 +2647,17 @@
uint32_t height = m_frame->m_fencPic->m_picHeight;
uint32_t block_x = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
uint32_t block_y = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
- uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) / 16;
+ uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (8 - 1)) / 8;
uint32_t blockSize = g_maxCUSize >> cuGeom.depth;
double qp_offset = 0;
uint32_t cnt = 0;
uint32_t idx;
- for (uint32_t block_yy = block_y; block_yy < block_y + blockSize && block_yy < height; block_yy += 16)
+ for (uint32_t block_yy = block_y; block_yy < block_y + blockSize && block_yy < height; block_yy += 8)
{
- for (uint32_t block_xx = block_x; block_xx < block_x + blockSize && block_xx < width; block_xx += 16)
+ for (uint32_t block_xx = block_x; block_xx < block_x + blockSize && block_xx < width; block_xx += 8)
{
- idx = ((block_yy / 16) * (maxCols)) + (block_xx / 16);
+ idx = ((block_yy / 8) * (maxCols)) + (block_xx / 8);
qp_offset += qpoffs[idx];
cnt++;
}
diff -r 49a0d1176aef -r 74d189cfdc36 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Wed Jul 27 21:47:20 2016 +0200
+++ b/source/encoder/encoder.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -605,7 +605,7 @@
if (pic_in->quantOffsets != NULL)
{
- int cuCount = inFrame->m_lowres.maxBlocksInRow * inFrame->m_lowres.maxBlocksInCol;
+ int cuCount = inFrame->m_lowres.maxBlocksInRowFullRes * inFrame->m_lowres.maxBlocksInColFullRes;
memcpy(inFrame->m_quantOffsets, pic_in->quantOffsets, cuCount * sizeof(float));
}
@@ -790,7 +790,7 @@
if (m_rateControl->writeRateControlFrameStats(outFrame, &curEncoder->m_rce))
m_aborted = true;
if (pic_out)
- {
+ {
/* m_rcData is allocated for every frame */
pic_out->rcData = outFrame->m_rcData;
outFrame->m_rcData->qpaRc = outFrame->m_encData->m_avgQpRc;
@@ -1583,7 +1583,7 @@
{
pps->bUseDQP = true;
pps->maxCuDQPDepth = g_log2Size[m_param->maxCUSize] - g_log2Size[m_param->rc.qgSize];
- X265_CHECK(pps->maxCuDQPDepth <= 2, "max CU DQP depth cannot be greater than 2\n");
+ X265_CHECK(pps->maxCuDQPDepth <= 3, "max CU DQP depth cannot be greater than 3\n");
}
else
{
@@ -1867,10 +1867,10 @@
bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
if (!m_param->bLossless && (m_param->rc.aqMode || bIsVbv))
{
- if (p->rc.qgSize < X265_MAX(16, p->minCUSize))
+ if (p->rc.qgSize < X265_MAX(8, p->minCUSize))
{
- p->rc.qgSize = X265_MAX(16, p->minCUSize);
- x265_log(p, X265_LOG_WARNING, "QGSize should be greater than or equal to 16 and minCUSize, setting QGSize = %d\n", p->rc.qgSize);
+ p->rc.qgSize = X265_MAX(8, p->minCUSize);
+ x265_log(p, X265_LOG_WARNING, "QGSize should be greater than or equal to 8 and minCUSize, setting QGSize = %d\n", p->rc.qgSize);
}
if (p->rc.qgSize > p->maxCUSize)
{
diff -r 49a0d1176aef -r 74d189cfdc36 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Wed Jul 27 21:47:20 2016 +0200
+++ b/source/encoder/ratecontrol.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -615,9 +615,9 @@
}
if (m_param->rc.cuTree)
{
- m_cuTreeStats.qpBuffer[0] = X265_MALLOC(uint16_t, m_ncu * sizeof(uint16_t));
+ m_cuTreeStats.qpBuffer[0] = X265_MALLOC(uint16_t, m_ncu * 4 * sizeof(uint16_t));
if (m_param->bBPyramid && m_param->rc.bStatRead)
- m_cuTreeStats.qpBuffer[1] = X265_MALLOC(uint16_t, m_ncu * sizeof(uint16_t));
+ m_cuTreeStats.qpBuffer[1] = X265_MALLOC(uint16_t, m_ncu * 4 * sizeof(uint16_t));
m_cuTreeStats.qpBufPos = -1;
}
}
@@ -1437,7 +1437,7 @@
if (!fread(&type, 1, 1, m_cutreeStatFileIn))
goto fail;
- if (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], sizeof(uint16_t), m_ncu, m_cutreeStatFileIn) != (size_t)m_ncu)
+ if (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], sizeof(uint16_t), m_ncu * 4, m_cutreeStatFileIn) != (size_t)m_ncu * 4)
goto fail;
if (type != sliceTypeActual && m_cuTreeStats.qpBufPos == 1)
@@ -1448,8 +1448,8 @@
}
while(type != sliceTypeActual);
}
- primitives.fix8Unpack(frame->m_lowres.qpCuTreeOffset, m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], m_ncu);
- for (int i = 0; i < m_ncu; i++)
+ primitives.fix8Unpack(frame->m_lowres.qpCuTreeOffset, m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], m_ncu * 4);
+ for (int i = 0; i < m_ncu * 4; i++)
frame->m_lowres.invQscaleFactor[i] = x265_exp2fix8(frame->m_lowres.qpCuTreeOffset[i]);
m_cuTreeStats.qpBufPos--;
}
@@ -2612,10 +2612,10 @@
if (m_param->rc.cuTree && IS_REFERENCED(curFrame) && !m_param->rc.bStatRead)
{
uint8_t sliceType = (uint8_t)rce->sliceType;
- primitives.fix8Pack(m_cuTreeStats.qpBuffer[0], curFrame->m_lowres.qpCuTreeOffset, m_ncu);
+ primitives.fix8Pack(m_cuTreeStats.qpBuffer[0], curFrame->m_lowres.qpCuTreeOffset, m_ncu * 4);
if (fwrite(&sliceType, 1, 1, m_cutreeStatFileOut) < 1)
goto writeFailure;
- if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), m_ncu, m_cutreeStatFileOut) < (size_t)m_ncu)
+ if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), m_ncu * 4, m_cutreeStatFileOut) < (size_t)m_ncu * 4)
goto writeFailure;
}
return 0;
diff -r 49a0d1176aef -r 74d189cfdc36 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Wed Jul 27 21:47:20 2016 +0200
+++ b/source/encoder/slicetype.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -60,12 +60,12 @@
{
if ((colorFormat != X265_CSP_I444) && plane)
{
- ALIGN_VAR_8(pixel, pix[8 * 8]);
- primitives.cu[BLOCK_8x8].copy_pp(pix, 8, src, srcStride);
- return acEnergyVar(curFrame, primitives.cu[BLOCK_8x8].var(pix, 8), 6, plane);
+ ALIGN_VAR_4(pixel, pix[4 * 4]);
+ primitives.cu[BLOCK_4x4].copy_pp(pix, 4, src, srcStride);
+ return acEnergyVar(curFrame, primitives.cu[BLOCK_4x4].var(pix, 4), 4, plane);
}
else
- return acEnergyVar(curFrame, primitives.cu[BLOCK_16x16].var(src, srcStride), 8, plane);
+ return acEnergyVar(curFrame, primitives.cu[BLOCK_8x8].var(src, srcStride), 6, plane);
}
} // end anonymous namespace
@@ -97,7 +97,7 @@
/* Actual adaptive quantization */
int maxCol = curFrame->m_fencPic->m_picWidth;
int maxRow = curFrame->m_fencPic->m_picHeight;
- int blockCount = curFrame->m_lowres.maxBlocksInRow * curFrame->m_lowres.maxBlocksInCol;
+ int blockCount = curFrame->m_lowres.maxBlocksInRowFullRes * curFrame->m_lowres.maxBlocksInColFullRes;
float* quantOffsets = curFrame->m_quantOffsets;
for (int y = 0; y < 3; y++)
@@ -113,7 +113,7 @@
if (param->rc.aqMode == X265_AQ_NONE || param->rc.aqStrength == 0)
{
/* Need to init it anyways for CU tree */
- int cuCount = widthInCU * heightInCU;
+ int cuCount = blockCount;
if (param->rc.aqMode && param->rc.aqStrength == 0)
{
@@ -137,8 +137,8 @@
/* Need variance data for weighted prediction */
if (param->bEnableWeightedPred || param->bEnableWeightedBiPred)
{
- for (blockY = 0; blockY < maxRow; blockY += 16)
- for (blockX = 0; blockX < maxCol; blockX += 16)
+ for (blockY = 0; blockY < maxRow; blockY += 8)
+ for (blockX = 0; blockX < maxCol; blockX += 8)
acEnergyCu(curFrame, blockX, blockY, param->internalCsp);
}
}
@@ -152,10 +152,10 @@
double bit_depth_correction = 1.f / (1 << (2*(X265_DEPTH-8)));
curFrame->m_lowres.frameVariance = 0;
uint64_t rowVariance = 0;
- for (blockY = 0; blockY < maxRow; blockY += 16)
+ for (blockY = 0; blockY < maxRow; blockY += 8)
{
rowVariance = 0;
- for (blockX = 0; blockX < maxCol; blockX += 16)
+ for (blockX = 0; blockX < maxCol; blockX += 8)
{
uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp);
curFrame->m_lowres.blockVariance[blockXY] = energy;
@@ -172,21 +172,21 @@
avg_adj /= blockCount;
avg_adj_pow2 /= blockCount;
strength = param->rc.aqStrength * avg_adj;
- avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (11.f)) / avg_adj;
+ avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (8.f)) / avg_adj;
bias_strength = param->rc.aqStrength;
}
else
strength = param->rc.aqStrength * 1.0397f;
blockXY = 0;
- for (blockY = 0; blockY < maxRow; blockY += 16)
+ for (blockY = 0; blockY < maxRow; blockY += 8)
{
- for (blockX = 0; blockX < maxCol; blockX += 16)
+ for (blockX = 0; blockX < maxCol; blockX += 8)
{
if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
{
qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
- qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - 11.f / (qp_adj * qp_adj));
+ qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - 8.f / (qp_adj * qp_adj));
}
else if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
{
@@ -196,7 +196,7 @@
else
{
uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp);
- qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (14.427f + 2 * (X265_DEPTH - 8)));
+ qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (11.427f + 2 * (X265_DEPTH - 8)));
}
if (quantOffsets != NULL)
qp_adj += quantOffsets[blockXY];
@@ -318,8 +318,11 @@
/* do not include edge blocks in the frame cost estimates, they are not very accurate */
const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
-
- int icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor[cuXY] + 128) >> 8) : icost;
+ int invQscaleFactor = (fenc.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4] +
+ fenc.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + 1] +
+ fenc.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + fenc.maxBlocksInRowFullRes] +
+ fenc.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + fenc.maxBlocksInRowFullRes + 1]) / 4;
+ int icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * invQscaleFactor + 128) >> 8) : icost;
if (bFrameScoreCU)
{
@@ -812,9 +815,13 @@
uint16_t lowresCuCost = curFrame->m_lowres.lowresCostForRc[lowresCuIdx] & LOWRES_COST_MASK;
if (qp_offset)
{
- lowresCuCost = (uint16_t)((lowresCuCost * x265_exp2fix8(qp_offset[lowresCuIdx]) + 128) >> 8);
+ double qpOffset = (qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4] +
+ qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + 1] +
+ qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes] +
+ qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes + 1]) / 4;
+ lowresCuCost = (uint16_t)((lowresCuCost * x265_exp2fix8(qpOffset) + 128) >> 8);
int32_t intraCuCost = curFrame->m_lowres.intraCost[lowresCuIdx];
- curFrame->m_lowres.intraCost[lowresCuIdx] = (intraCuCost * x265_exp2fix8(qp_offset[lowresCuIdx]) + 128) >> 8;
+ curFrame->m_lowres.intraCost[lowresCuIdx] = (intraCuCost * x265_exp2fix8(qpOffset) + 128) >> 8;
}
if (m_param->bIntraRefresh && slice->m_sliceType == X265_TYPE_P)
for (uint32_t x = curFrame->m_encData->m_pir.pirStartCol; x <= curFrame->m_encData->m_pir.pirEndCol; x++)
@@ -1669,7 +1676,7 @@
if (bIntra)
{
memset(frames[0]->propagateCost, 0, m_cuCount * sizeof(uint16_t));
- memcpy(frames[0]->qpCuTreeOffset, frames[0]->qpAqOffset, m_cuCount * sizeof(double));
+ memcpy(frames[0]->qpCuTreeOffset, frames[0]->qpAqOffset, m_cuCount * 4 * sizeof(double));
return;
}
std::swap(frames[lastnonb]->propagateCost, frames[0]->propagateCost);
@@ -1766,7 +1773,7 @@
int cuIndex = blocky * strideInCU;
primitives.propagateCost(m_scratch, propagateCost,
frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
- frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_8x8Width);
+ frames[b]->invQscaleFactor + (cuIndex * 4), &fpsFactor, m_8x8Width);
if (referenced)
propagateCost += m_8x8Width;
@@ -1852,14 +1859,27 @@
if (ref0Distance && frame->weightedCostDelta[ref0Distance - 1] > 0)
weightdelta = (1.0 - frame->weightedCostDelta[ref0Distance - 1]);
- for (int cuIndex = 0; cuIndex < m_cuCount; cuIndex++)
+ for (int cuY = 0; cuY < m_8x8Height; cuY++)
{
- int intracost = (frame->intraCost[cuIndex] * frame->invQscaleFactor[cuIndex] + 128) >> 8;
- if (intracost)
+ for (int cuX = 0; cuX < m_8x8Width; cuX++)
{
- int propagateCost = (frame->propagateCost[cuIndex] * fpsFactor + 128) >> 8;
- double log2_ratio = X265_LOG2(intracost + propagateCost) - X265_LOG2(intracost) + weightdelta;
- frame->qpCuTreeOffset[cuIndex] = frame->qpAqOffset[cuIndex] - m_cuTreeStrength * log2_ratio;
+ const int cuXY = cuX + cuY * m_8x8Width;
+ int invQscaleFactor = (frame->invQscaleFactor[cuX * 2 + cuY * m_8x8Width * 4] +
+ frame->invQscaleFactor[cuX * 2 + cuY * m_8x8Width * 4 + 1] +
+ frame->invQscaleFactor[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes] +
+ frame->invQscaleFactor[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes + 1]) / 4;
+
+ int intracost = (frame->intraCost[cuXY] * invQscaleFactor + 128) >> 8;
+ if (intracost)
+ {
+ int propagateCost = (frame->propagateCost[cuXY] * fpsFactor + 128) >> 8;
+ double log2_ratio = X265_LOG2(intracost + propagateCost) - X265_LOG2(intracost) + weightdelta;
+ frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4] - m_cuTreeStrength * (log2_ratio);
+ frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4 + 1] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4 + 1] - m_cuTreeStrength * (log2_ratio);
+ frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes] - m_cuTreeStrength * (log2_ratio);
+ frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes + 1] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes + 1] - m_cuTreeStrength * (log2_ratio);
+ }
+
}
}
}
@@ -1883,7 +1903,10 @@
{
int cuxy = cux + cuy * m_8x8Width;
int cuCost = frames[b]->lowresCosts[b - p0][p1 - b][cuxy] & LOWRES_COST_MASK;
- double qp_adj = qp_offset[cuxy];
+ double qp_adj = (qp_offset[cux * 2 + cuy * m_8x8Width * 4] +
+ qp_offset[cux * 2 + cuy * m_8x8Width * 4 + 1] +
+ qp_offset[cux * 2 + cuy * m_8x8Width * 4 + frames[b]->maxBlocksInRowFullRes] +
+ qp_offset[cux * 2 + cuy * m_8x8Width * 4 + frames[b]->maxBlocksInRowFullRes + 1]) / 4;
cuCost = (cuCost * x265_exp2fix8(qp_adj) + 128) >> 8;
rowSatd[cuy] += cuCost;
if ((cuy > 0 && cuy < m_8x8Height - 1 &&
@@ -2202,8 +2225,12 @@
/* do not include edge blocks in the frame cost estimates, they are not very accurate */
const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
+ int invQscaleFactor = (fenc->invQscaleFactor[cuX * 2 + cuY * widthInCU * 4] +
+ fenc->invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + 1] +
+ fenc->invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + fenc->maxBlocksInRowFullRes] +
+ fenc->invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + fenc->maxBlocksInRowFullRes + 1]) / 4;
- int bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor[cuXY] + 128) >> 8) : bcost;
+ int bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * invQscaleFactor + 128) >> 8) : bcost;
if (bFrameScoreCU)
{
More information about the x265-devel
mailing list