[x265] [PATCH] slicetype: change the value of total 16x16 blocks for averaging of qp_adj
sreelakshmy at multicorewareinc.com
sreelakshmy at multicorewareinc.com
Mon Mar 9 08:16:12 CET 2015
# HG changeset patch
# User Sreelakshmy V G <sreelakshmy at multicorewareinc.com>
# Date 1425884893 -19800
# Mon Mar 09 12:38:13 2015 +0530
# Node ID ef90273acbaffbf390e6a947583d9141cdf1d3e6
# Parent 043c2418864b0a3ada6f597e6def6ead73d90b5f
slicetype: change the value of total 16x16 blocks for averaging of qp_adj
m_ncu in ratecontrol signifies actual number of 16x16 blocks, whereas ncu in
slicetype leaves out the border blocks. So there is a difference in the value of
both.
diff -r 043c2418864b -r ef90273acbaf source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Fri Mar 06 13:15:55 2015 -0600
+++ b/source/encoder/slicetype.cpp Mon Mar 09 12:38:13 2015 +0530
@@ -106,6 +106,10 @@
int maxCol = curFrame->m_fencPic->m_picWidth;
int maxRow = curFrame->m_fencPic->m_picHeight;
+ int cuWidth = ((param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+ int cuHeight = ((param->sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+ int m_ncu = cuWidth * cuHeight;
+
for (int y = 0; y < 3; y++)
{
curFrame->m_lowres.wp_ssd[y] = 0;
@@ -157,8 +161,8 @@
}
}
- avg_adj /= ncu;
- avg_adj_pow2 /= ncu;
+ avg_adj /= m_ncu;
+ avg_adj_pow2 /= m_ncu;
strength = param->rc.aqStrength * avg_adj / bit_depth_correction;
avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (11.f * bit_depth_correction)) / avg_adj;
}
@@ -476,9 +480,9 @@
m_outputSignalRequired = false;
m_isActive = true;
- m_heightInCU = ((m_param->sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
- m_widthInCU = ((m_param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
- m_ncu = m_widthInCU > 2 && m_heightInCU > 2 ? (m_widthInCU - 2) * (m_heightInCU - 2) : m_widthInCU * m_heightInCU;
+ m_8x8Height = ((m_param->sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+ m_8x8Width = ((m_param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+ m_8x8Blocks = m_8x8Width > 2 && m_8x8Height > 2 ? (m_8x8Width - 2) * (m_8x8Height - 2) : m_8x8Width * m_8x8Height;
m_lastKeyframe = -m_param->keyframeMax;
memset(m_preframes, 0, sizeof(m_preframes));
@@ -505,14 +509,14 @@
if (m_bBatchMotionSearch && m_pool->m_numWorkers > 12)
{
- m_numRowsPerSlice = m_heightInCU / (m_pool->m_numWorkers - 1); // default to numWorkers - 1 slices
+ m_numRowsPerSlice = m_8x8Height / (m_pool->m_numWorkers - 1); // default to numWorkers - 1 slices
m_numRowsPerSlice = X265_MAX(m_numRowsPerSlice, 10); // at least 10 rows per slice
- m_numRowsPerSlice = X265_MIN(m_numRowsPerSlice, m_heightInCU); // but no more than the full picture
- m_numCoopSlices = m_heightInCU / m_numRowsPerSlice;
+ m_numRowsPerSlice = X265_MIN(m_numRowsPerSlice, m_8x8Height); // but no more than the full picture
+ m_numCoopSlices = m_8x8Height / m_numRowsPerSlice;
}
else
{
- m_numRowsPerSlice = m_heightInCU;
+ m_numRowsPerSlice = m_8x8Height;
m_numCoopSlices = 1;
}
@@ -547,7 +551,7 @@
int numTLD = 1 + (m_pool ? m_pool->m_numWorkers : 0);
m_tld = new LookaheadTLD[numTLD];
for (int i = 0; i < numTLD; i++)
- m_tld[i].init(m_widthInCU, m_heightInCU, m_ncu);
+ m_tld[i].init(m_8x8Width, m_8x8Height, m_8x8Blocks);
m_scratch = X265_MALLOC(int, m_tld[0].widthInCU);
return m_tld && m_scratch;
@@ -799,7 +803,7 @@
uint32_t lowresRow = 0, lowresCol = 0, lowresCuIdx = 0, sum = 0;
uint32_t scale = m_param->maxCUSize / (2 * X265_LOWRES_CU_SIZE);
uint32_t numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
- uint32_t widthInLowresCu = (uint32_t)m_widthInCU, heightInLowresCu = (uint32_t)m_heightInCU;
+ uint32_t widthInLowresCu = (uint32_t)m_8x8Width, heightInLowresCu = (uint32_t)m_8x8Height;
double *qp_offset = 0;
/* Factor in qpoffsets based on Aq/Cutree in CU costs */
if (m_param->rc.aqMode)
@@ -1153,7 +1157,7 @@
{
int numFrames, origNumFrames, keyintLimit, framecnt;
int maxSearch = X265_MIN(m_param->lookaheadDepth, X265_LOOKAHEAD_MAX);
- int cuCount = m_ncu;
+ int cuCount = m_8x8Blocks;
int resetStart;
bool bIsVbvLookahead = m_param->rc.vbvBufferSize && m_param->lookaheadDepth;
@@ -1433,7 +1437,7 @@
if (res && bRealScenecut)
{
int imb = frame->intraMbs[p1 - p0];
- int pmb = m_ncu - imb;
+ int pmb = m_8x8Blocks - imb;
x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d Icost:%d Pcost:%d ratio:%.4f bias:%.4f gop:%d (imb:%d pmb:%d)\n",
frame->frameNum, icost, pcost, 1. - (double)pcost / icost, bias, gopSize, imb, pmb);
}
@@ -1530,7 +1534,7 @@
double averageDuration = totalDuration / (numframes + 1);
int i = numframes;
- int cuCount = m_widthInCU * m_heightInCU;
+ int cuCount = m_8x8Width * m_8x8Height;
while (i > 0 && frames[i]->sliceType == X265_TYPE_B)
i--;
@@ -1625,7 +1629,7 @@
int32_t bipredWeights[2] = { bipredWeight, 64 - bipredWeight };
int listDist[2] = { b - p0 - 1, p1 - b - 1 };
- memset(m_scratch, 0, m_widthInCU * sizeof(int));
+ memset(m_scratch, 0, m_8x8Width * sizeof(int));
uint16_t *propagateCost = frames[b]->propagateCost;
@@ -1634,20 +1638,20 @@
/* For non-referred frames the source costs are always zero, so just memset one row and re-use it. */
if (!referenced)
- memset(frames[b]->propagateCost, 0, m_widthInCU * sizeof(uint16_t));
+ memset(frames[b]->propagateCost, 0, m_8x8Width * sizeof(uint16_t));
- int32_t strideInCU = m_widthInCU;
- for (uint16_t blocky = 0; blocky < m_heightInCU; blocky++)
+ int32_t strideInCU = m_8x8Width;
+ for (uint16_t blocky = 0; blocky < m_8x8Height; blocky++)
{
int cuIndex = blocky * strideInCU;
primitives.propagateCost(m_scratch, propagateCost,
frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
- frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_widthInCU);
+ frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_8x8Width);
if (referenced)
- propagateCost += m_widthInCU;
+ propagateCost += m_8x8Width;
- for (uint16_t blockx = 0; blockx < m_widthInCU; blockx++, cuIndex++)
+ for (uint16_t blockx = 0; blockx < m_8x8Width; blockx++, cuIndex++)
{
int32_t propagate_amount = m_scratch[blockx];
/* Don't propagate for an intra block. */
@@ -1692,7 +1696,7 @@
/* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
* be counted. */
- if (cux < m_widthInCU - 1 && cuy < m_heightInCU - 1 && cux >= 0 && cuy >= 0)
+ if (cux < m_8x8Width - 1 && cuy < m_8x8Height - 1 && cux >= 0 && cuy >= 0)
{
CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10);
CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10);
@@ -1701,13 +1705,13 @@
}
else /* Check offsets individually */
{
- if (cux < m_widthInCU && cuy < m_heightInCU && cux >= 0 && cuy >= 0)
+ if (cux < m_8x8Width && cuy < m_8x8Height && cux >= 0 && cuy >= 0)
CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10);
- if (cux + 1 < m_widthInCU && cuy < m_heightInCU && cux + 1 >= 0 && cuy >= 0)
+ if (cux + 1 < m_8x8Width && cuy < m_8x8Height && cux + 1 >= 0 && cuy >= 0)
CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10);
- if (cux < m_widthInCU && cuy + 1 < m_heightInCU && cux >= 0 && cuy + 1 >= 0)
+ if (cux < m_8x8Width && cuy + 1 < m_8x8Height && cux >= 0 && cuy + 1 >= 0)
CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10);
- if (cux + 1 < m_widthInCU && cuy + 1 < m_heightInCU && cux + 1 >= 0 && cuy + 1 >= 0)
+ if (cux + 1 < m_8x8Width && cuy + 1 < m_8x8Height && cux + 1 >= 0 && cuy + 1 >= 0)
CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10);
}
}
@@ -1731,7 +1735,7 @@
/* Allow the strength to be adjusted via qcompress, since the two concepts
* are very similar. */
- int cuCount = m_widthInCU * m_heightInCU;
+ int cuCount = m_8x8Width * m_8x8Height;
double strength = 5.0 * (1.0 - m_param->rc.qCompress);
for (int cuIndex = 0; cuIndex < cuCount; cuIndex++)
@@ -1755,19 +1759,19 @@
double *qp_offset = (frames[b]->sliceType == X265_TYPE_B) ? frames[b]->qpAqOffset : frames[b]->qpCuTreeOffset;
x265_emms();
- for (int cuy = m_heightInCU - 1; cuy >= 0; cuy--)
+ for (int cuy = m_8x8Height - 1; cuy >= 0; cuy--)
{
rowSatd[cuy] = 0;
- for (int cux = m_widthInCU - 1; cux >= 0; cux--)
+ for (int cux = m_8x8Width - 1; cux >= 0; cux--)
{
- int cuxy = cux + cuy * m_widthInCU;
+ int cuxy = cux + cuy * m_8x8Width;
int cuCost = frames[b]->lowresCosts[b - p0][p1 - b][cuxy] & LOWRES_COST_MASK;
double qp_adj = qp_offset[cuxy];
cuCost = (cuCost * x265_exp2fix8(qp_adj) + 128) >> 8;
rowSatd[cuy] += cuCost;
- if ((cuy > 0 && cuy < m_heightInCU - 1 &&
- cux > 0 && cux < m_widthInCU - 1) ||
- m_widthInCU <= 2 || m_heightInCU <= 2)
+ if ((cuy > 0 && cuy < m_8x8Height - 1 &&
+ cux > 0 && cux < m_8x8Width - 1) ||
+ m_8x8Width <= 2 || m_8x8Height <= 2)
{
score += cuCost;
}
@@ -1842,14 +1846,14 @@
X265_CHECK(i < MAX_COOP_SLICES, "impossible number of coop slices\n");
int firstY = m_lookahead.m_numRowsPerSlice * i;
- int lastY = (i == m_jobTotal - 1) ? m_lookahead.m_heightInCU - 1 : m_lookahead.m_numRowsPerSlice * (i + 1) - 1;
+ int lastY = (i == m_jobTotal - 1) ? m_lookahead.m_8x8Height - 1 : m_lookahead.m_numRowsPerSlice * (i + 1) - 1;
bool lastRow = true;
for (int cuY = lastY; cuY >= firstY; cuY--)
{
m_frames[m_coop.b]->rowSatds[m_coop.b - m_coop.p0][m_coop.p1 - m_coop.b][cuY] = 0;
- for (int cuX = m_lookahead.m_widthInCU - 1; cuX >= 0; cuX--)
+ for (int cuX = m_lookahead.m_8x8Width - 1; cuX >= 0; cuX--)
estimateCUCost(tld, cuX, cuY, m_coop.p0, m_coop.p1, m_coop.b, m_coop.bDoSearch, lastRow, i);
lastRow = false;
@@ -1919,11 +1923,11 @@
else
{
bool lastRow = true;
- for (int cuY = m_lookahead.m_heightInCU - 1; cuY >= 0; cuY--)
+ for (int cuY = m_lookahead.m_8x8Height - 1; cuY >= 0; cuY--)
{
fenc->rowSatds[b - p0][p1 - b][cuY] = 0;
- for (int cuX = m_lookahead.m_widthInCU - 1; cuX >= 0; cuX--)
+ for (int cuX = m_lookahead.m_8x8Width - 1; cuX >= 0; cuX--)
estimateCUCost(tld, cuX, cuY, p0, p1, b, bDoSearch, lastRow, -1);
lastRow = false;
@@ -1953,8 +1957,8 @@
ReferencePlanes *wfref0 = tld.weightedRef.isWeighted ? &tld.weightedRef : fref0;
- const int widthInCU = m_lookahead.m_widthInCU;
- const int heightInCU = m_lookahead.m_heightInCU;
+ const int widthInCU = m_lookahead.m_8x8Width;
+ const int heightInCU = m_lookahead.m_8x8Height;
const int bBidir = (b < p1);
const int cuXY = cuX + cuY * widthInCU;
const int cuSize = X265_LOWRES_CU_SIZE;
diff -r 043c2418864b -r ef90273acbaf source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Fri Mar 06 13:15:55 2015 -0600
+++ b/source/encoder/slicetype.h Mon Mar 09 12:38:13 2015 +0530
@@ -124,9 +124,9 @@
int m_histogram[X265_BFRAME_MAX + 1];
int m_lastKeyframe;
- int m_widthInCU;
- int m_heightInCU;
- int m_ncu;
+ int m_8x8Width;
+ int m_8x8Height;
+ int m_8x8Blocks;
int m_numCoopSlices;
int m_numRowsPerSlice;
bool m_filled;
More information about the x265-devel
mailing list