[x265] [PATCH] rc: add support for qg-size 8
gopi.satykrishna at multicorewareinc.com
gopi.satykrishna at multicorewareinc.com
Fri Aug 26 06:49:51 CEST 2016
# HG changeset patch
# User Gopi Satykrishna Akisetty <gopi.satykrishna at multicorewareinc.com>
# Date 1471411031 -19800
# Wed Aug 17 10:47:11 2016 +0530
# Node ID ab205f07f87b6a8485732e11cbef67c10eaf9b7a
# Parent 215eedc9ecc0570baaf8189eda7b96f1df89bd22
rc: add support for qg-size 8
diff -r 215eedc9ecc0 -r ab205f07f87b doc/reST/cli.rst
--- a/doc/reST/cli.rst Wed Aug 24 13:17:45 2016 +0530
+++ b/doc/reST/cli.rst Wed Aug 17 10:47:11 2016 +0530
@@ -1328,11 +1328,11 @@
Default 1.0.
**Range of values:** 0.0 to 3.0
-.. option:: --qg-size <64|32|16>
+.. option:: --qg-size <64|32|16|8>
Enable adaptive quantization for sub-CTUs. This parameter specifies
the minimum CU size at which QP can be adjusted, ie. Quantization Group
- size. Allowed range of values are 64, 32, 16 provided this falls within
+ size. Allowed range of values are 64, 32, 16, 8 provided this falls within
the inclusive range [maxCUSize, minCUSize]. Experimental.
Default: same as maxCUSize
diff -r 215eedc9ecc0 -r ab205f07f87b source/common/common.h
--- a/source/common/common.h Wed Aug 24 13:17:45 2016 +0530
+++ b/source/common/common.h Wed Aug 17 10:47:11 2016 +0530
@@ -71,6 +71,7 @@
#define NUM_INTRA_MODE 35
#if defined(__GNUC__)
+#define ALIGN_VAR_4(T, var) T var __attribute__((aligned(4)))
#define ALIGN_VAR_8(T, var) T var __attribute__((aligned(8)))
#define ALIGN_VAR_16(T, var) T var __attribute__((aligned(16)))
#define ALIGN_VAR_32(T, var) T var __attribute__((aligned(32)))
@@ -81,6 +82,7 @@
#elif defined(_MSC_VER)
+#define ALIGN_VAR_4(T, var) __declspec(align(4)) T var
#define ALIGN_VAR_8(T, var) __declspec(align(8)) T var
#define ALIGN_VAR_16(T, var) __declspec(align(16)) T var
#define ALIGN_VAR_32(T, var) __declspec(align(32)) T var
diff -r 215eedc9ecc0 -r ab205f07f87b source/common/frame.cpp
--- a/source/common/frame.cpp Wed Aug 24 13:17:45 2016 +0530
+++ b/source/common/frame.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -54,7 +54,7 @@
CHECKED_MALLOC_ZERO(m_rcData, RcStats, 1);
if (m_fencPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp) &&
- m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode))
+ m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode, param->rc.qgSize))
{
X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");
m_numRows = (m_fencPic->m_picHeight + g_maxCUSize - 1) / g_maxCUSize;
@@ -62,7 +62,11 @@
if (quantOffsets)
{
- int32_t cuCount = m_lowres.maxBlocksInRow * m_lowres.maxBlocksInCol;
+ int32_t cuCount;
+ if (param->rc.qgSize == 8 )
+ cuCount = m_lowres.maxBlocksInRowFullRes * m_lowres.maxBlocksInColFullRes;
+ else
+ cuCount = m_lowres.maxBlocksInRow * m_lowres.maxBlocksInCol;
m_quantOffsets = new float[cuCount];
}
return true;
diff -r 215eedc9ecc0 -r ab205f07f87b source/common/lowres.cpp
--- a/source/common/lowres.cpp Wed Aug 24 13:17:45 2016 +0530
+++ b/source/common/lowres.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -27,7 +27,7 @@
using namespace X265_NS;
-bool Lowres::create(PicYuv *origPic, int _bframes, bool bAQEnabled)
+bool Lowres::create(PicYuv *origPic, int _bframes, bool bAQEnabled, uint32_t qgSize)
{
isLowres = true;
bframes = _bframes;
@@ -38,7 +38,14 @@
lumaStride += 32 - (lumaStride & 31);
maxBlocksInRow = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
maxBlocksInCol = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+ maxBlocksInRowFullRes = maxBlocksInRow * 2;
+ maxBlocksInColFullRes = maxBlocksInCol * 2;
int cuCount = maxBlocksInRow * maxBlocksInCol;
+ int cuCountFullRes;
+ if (qgSize == 8)
+ cuCountFullRes = maxBlocksInRowFullRes * maxBlocksInColFullRes;
+ else
+ cuCountFullRes = maxBlocksInRow * maxBlocksInCol;
/* rounding the width to multiple of lowres CU size */
width = maxBlocksInRow * X265_LOWRES_CU_SIZE;
@@ -49,10 +56,10 @@
if (bAQEnabled)
{
- CHECKED_MALLOC(qpAqOffset, double, cuCount);
- CHECKED_MALLOC(invQscaleFactor, int, cuCount);
- CHECKED_MALLOC(qpCuTreeOffset, double, cuCount);
- CHECKED_MALLOC(blockVariance, uint32_t, cuCount);
+ CHECKED_MALLOC(qpAqOffset, double, cuCountFullRes);
+ CHECKED_MALLOC(invQscaleFactor, int, cuCountFullRes);
+ CHECKED_MALLOC(qpCuTreeOffset, double, cuCountFullRes);
+ CHECKED_MALLOC(blockVariance, uint32_t, cuCountFullRes);
}
CHECKED_MALLOC(propagateCost, uint16_t, cuCount);
diff -r 215eedc9ecc0 -r ab205f07f87b source/common/lowres.h
--- a/source/common/lowres.h Wed Aug 24 13:17:45 2016 +0530
+++ b/source/common/lowres.h Wed Aug 17 10:47:11 2016 +0530
@@ -132,6 +132,8 @@
MV* lowresMvs[2][X265_BFRAME_MAX + 1];
uint32_t maxBlocksInRow;
uint32_t maxBlocksInCol;
+ uint32_t maxBlocksInRowFullRes;
+ uint32_t maxBlocksInColFullRes;
/* used for vbvLookahead */
int plannedType[X265_LOOKAHEAD_MAX + 1];
@@ -153,7 +155,7 @@
double weightedCostDelta[X265_BFRAME_MAX + 2];
ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
- bool create(PicYuv *origPic, int _bframes, bool bAqEnabled);
+ bool create(PicYuv *origPic, int _bframes, bool bAqEnabled, uint32_t qgSize);
void destroy();
void init(PicYuv *origPic, int poc);
};
diff -r 215eedc9ecc0 -r ab205f07f87b source/common/pixel.cpp
--- a/source/common/pixel.cpp Wed Aug 24 13:17:45 2016 +0530
+++ b/source/common/pixel.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -845,30 +845,57 @@
/* Estimate the total amount of influence on future quality that could be had if we
* were to improve the reference samples used to inter predict any given CU. */
static void estimateCUPropagateCost(int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts,
- const int32_t* invQscales, const double* fpsFactor, int len)
+ const int32_t* invQscales, const double* fpsFactor, int len, uint32_t qgSize)
{
double fps = *fpsFactor / 256; // range[0.01, 1.00]
-
- for (int i = 0; i < len; i++)
+ if (qgSize == 8)
{
- int intraCost = intraCosts[i];
- int interCost = X265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);
- double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8
- double propagateAmount = (double)propagateIn[i] + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0
- double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0
+ for (int i = 0; i < len; i++)
+ {
+ int intraCost = intraCosts[i];
+ int interCost = X265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);
+ int invQscaleFactor = (invQscales[i * 2] + invQscales[i * 2 + 1] + invQscales[i * 2 + len * 2] + invQscales[i * 2 + len * 2 + 1]) / 4;
+ double propagateIntra = intraCost * invQscaleFactor; // Q16 x Q8.8 = Q24.8
+ double propagateAmount = (double)propagateIn[i] + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0
+ double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0
#if 0
- // algorithm that output match to asm
- float intraRcp = (float)1.0f / intraCost; // VC can't mapping this into RCPPS
- float intraRcpError1 = (float)intraCost * (float)intraRcp;
- intraRcpError1 *= (float)intraRcp;
- float intraRcpError2 = intraRcp + intraRcp;
- float propagateDenom = intraRcpError2 - intraRcpError1;
- dst[i] = (int)(propagateAmount * propagateNum * (double)propagateDenom + 0.5);
+ // algorithm that output match to asm
+ float intraRcp = (float)1.0f / intraCost; // VC can't mapping this into RCPPS
+ float intraRcpError1 = (float)intraCost * (float)intraRcp;
+ intraRcpError1 *= (float)intraRcp;
+ float intraRcpError2 = intraRcp + intraRcp;
+ float propagateDenom = intraRcpError2 - intraRcpError1;
+ dst[i] = (int)(propagateAmount * propagateNum * (double)propagateDenom + 0.5);
#else
- double propagateDenom = (double)intraCost; // Q32
- dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);
+ double propagateDenom = (double)intraCost; // Q32
+ dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);
#endif
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ int intraCost = intraCosts[i];
+ int interCost = X265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);
+ double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8
+ double propagateAmount = (double)propagateIn[i] + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0
+ double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0
+
+#if 0
+ // algorithm that output match to asm
+ float intraRcp = (float)1.0f / intraCost; // VC can't mapping this into RCPPS
+ float intraRcpError1 = (float)intraCost * (float)intraRcp;
+ intraRcpError1 *= (float)intraRcp;
+ float intraRcpError2 = intraRcp + intraRcp;
+ float propagateDenom = intraRcpError2 - intraRcpError1;
+ dst[i] = (int)(propagateAmount * propagateNum * (double)propagateDenom + 0.5);
+#else
+ double propagateDenom = (double)intraCost; // Q32
+ dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);
+#endif
+ }
}
}
diff -r 215eedc9ecc0 -r ab205f07f87b source/common/primitives.h
--- a/source/common/primitives.h Wed Aug 24 13:17:45 2016 +0530
+++ b/source/common/primitives.h Wed Aug 17 10:47:11 2016 +0530
@@ -187,7 +187,7 @@
typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix);
-typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len);
+typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len, uint32_t qgSize);
typedef void (*cutree_fix8_unpack)(double *dst, uint16_t *src, int count);
typedef void (*cutree_fix8_pack)(uint16_t *dst, double *src, int count);
diff -r 215eedc9ecc0 -r ab205f07f87b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Aug 24 13:17:45 2016 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -1027,7 +1027,7 @@
ALL_CHROMA_444_PU(p2s, filterPixelToShort, sse2);
ALL_LUMA_PU(convert_p2s, filterPixelToShort, sse2);
ALL_LUMA_TU(count_nonzero, count_nonzero, sse2);
- p.propagateCost = PFX(mbtree_propagate_cost_sse2);
+ //p.propagateCost = PFX(mbtree_propagate_cost_sse2);
}
if (cpuMask & X265_CPU_SSE3)
{
@@ -1312,7 +1312,7 @@
p.pu[LUMA_64x32].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x32_avx);
p.pu[LUMA_64x48].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x48_avx);
p.pu[LUMA_64x64].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x64_avx);
- p.propagateCost = PFX(mbtree_propagate_cost_avx);
+ //p.propagateCost = PFX(mbtree_propagate_cost_avx);
}
if (cpuMask & X265_CPU_XOP)
{
@@ -2153,7 +2153,7 @@
p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vsp = PFX(interp_4tap_vert_sp_64x64_avx2);
p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
- p.propagateCost = PFX(mbtree_propagate_cost_avx2);
+ //p.propagateCost = PFX(mbtree_propagate_cost_avx2);
p.fix8Unpack = PFX(cutree_fix8_unpack_avx2);
p.fix8Pack = PFX(cutree_fix8_pack_avx2);
@@ -2356,7 +2356,7 @@
ALL_CHROMA_444_PU(p2s, filterPixelToShort, sse2);
ALL_LUMA_PU(convert_p2s, filterPixelToShort, sse2);
ALL_LUMA_TU(count_nonzero, count_nonzero, sse2);
- p.propagateCost = PFX(mbtree_propagate_cost_sse2);
+ //p.propagateCost = PFX(mbtree_propagate_cost_sse2);
}
if (cpuMask & X265_CPU_SSE3)
{
@@ -2670,7 +2670,7 @@
p.pu[LUMA_48x64].copy_pp = PFX(blockcopy_pp_48x64_avx);
p.frameInitLowres = PFX(frame_init_lowres_core_avx);
- p.propagateCost = PFX(mbtree_propagate_cost_avx);
+ //p.propagateCost = PFX(mbtree_propagate_cost_avx);
}
if (cpuMask & X265_CPU_XOP)
{
@@ -3666,7 +3666,7 @@
p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vpp = PFX(interp_4tap_vert_pp_64x16_avx2);
p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
- p.propagateCost = PFX(mbtree_propagate_cost_avx2);
+ //p.propagateCost = PFX(mbtree_propagate_cost_avx2);
p.saoCuStatsE0 = PFX(saoCuStatsE0_avx2);
p.saoCuStatsE1 = PFX(saoCuStatsE1_avx2);
p.saoCuStatsE2 = PFX(saoCuStatsE2_avx2);
diff -r 215eedc9ecc0 -r ab205f07f87b source/common/x86/mc-a2.asm
--- a/source/common/x86/mc-a2.asm Wed Aug 24 13:17:45 2016 +0530
+++ b/source/common/x86/mc-a2.asm Wed Aug 17 10:47:11 2016 +0530
@@ -994,7 +994,7 @@
;-----------------------------------------------------------------------------
; void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, int32_t *intra_costs,
-; uint16_t *inter_costs, int32_t *inv_qscales, double *fps_factor, int len )
+; uint16_t *inter_costs, int32_t *inv_qscales, double *fps_factor, int len, uint32_t qgSize)
;-----------------------------------------------------------------------------
INIT_XMM sse2
cglobal mbtree_propagate_cost, 7,7,7
diff -r 215eedc9ecc0 -r ab205f07f87b source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Wed Aug 24 13:17:45 2016 +0530
+++ b/source/encoder/analysis.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -2637,7 +2637,11 @@
{
FrameData& curEncData = *m_frame->m_encData;
double qp = baseQp >= 0 ? baseQp : curEncData.m_cuStat[ctu.m_cuAddr].baseQp;
-
+ int loopIncr;
+ if (m_param->rc.qgSize == 8)
+ loopIncr = 8;
+ else
+ loopIncr = 16;
/* Use cuTree offsets if cuTree enabled and frame is referenced, else use AQ offsets */
bool isReferenced = IS_REFERENCED(m_frame);
double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset;
@@ -2647,17 +2651,17 @@
uint32_t height = m_frame->m_fencPic->m_picHeight;
uint32_t block_x = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
uint32_t block_y = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
- uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) / 16;
+ uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr;
uint32_t blockSize = g_maxCUSize >> cuGeom.depth;
double qp_offset = 0;
uint32_t cnt = 0;
uint32_t idx;
- for (uint32_t block_yy = block_y; block_yy < block_y + blockSize && block_yy < height; block_yy += 16)
+ for (uint32_t block_yy = block_y; block_yy < block_y + blockSize && block_yy < height; block_yy += loopIncr)
{
- for (uint32_t block_xx = block_x; block_xx < block_x + blockSize && block_xx < width; block_xx += 16)
+ for (uint32_t block_xx = block_x; block_xx < block_x + blockSize && block_xx < width; block_xx += loopIncr)
{
- idx = ((block_yy / 16) * (maxCols)) + (block_xx / 16);
+ idx = ((block_yy / loopIncr) * (maxCols)) + (block_xx / loopIncr);
qp_offset += qpoffs[idx];
cnt++;
}
diff -r 215eedc9ecc0 -r ab205f07f87b source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Wed Aug 24 13:17:45 2016 +0530
+++ b/source/encoder/encoder.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -605,7 +605,11 @@
if (pic_in->quantOffsets != NULL)
{
- int cuCount = inFrame->m_lowres.maxBlocksInRow * inFrame->m_lowres.maxBlocksInCol;
+ int cuCount;
+ if (m_param->rc.qgSize == 8)
+ cuCount = inFrame->m_lowres.maxBlocksInRowFullRes * inFrame->m_lowres.maxBlocksInColFullRes;
+ else
+ cuCount = inFrame->m_lowres.maxBlocksInRow * inFrame->m_lowres.maxBlocksInCol;
memcpy(inFrame->m_quantOffsets, pic_in->quantOffsets, cuCount * sizeof(float));
}
@@ -790,7 +794,7 @@
if (m_rateControl->writeRateControlFrameStats(outFrame, &curEncoder->m_rce))
m_aborted = true;
if (pic_out)
- {
+ {
/* m_rcData is allocated for every frame */
pic_out->rcData = outFrame->m_rcData;
outFrame->m_rcData->qpaRc = outFrame->m_encData->m_avgQpRc;
@@ -1590,7 +1594,7 @@
{
pps->bUseDQP = true;
pps->maxCuDQPDepth = g_log2Size[m_param->maxCUSize] - g_log2Size[m_param->rc.qgSize];
- X265_CHECK(pps->maxCuDQPDepth <= 2, "max CU DQP depth cannot be greater than 2\n");
+ X265_CHECK(pps->maxCuDQPDepth <= 3, "max CU DQP depth cannot be greater than 3\n");
}
else
{
@@ -1874,10 +1878,10 @@
bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
if (!m_param->bLossless && (m_param->rc.aqMode || bIsVbv))
{
- if (p->rc.qgSize < X265_MAX(16, p->minCUSize))
+ if (p->rc.qgSize < X265_MAX(8, p->minCUSize))
{
- p->rc.qgSize = X265_MAX(16, p->minCUSize);
- x265_log(p, X265_LOG_WARNING, "QGSize should be greater than or equal to 16 and minCUSize, setting QGSize = %d\n", p->rc.qgSize);
+ p->rc.qgSize = X265_MAX(8, p->minCUSize);
+ x265_log(p, X265_LOG_WARNING, "QGSize should be greater than or equal to 8 and minCUSize, setting QGSize = %d\n", p->rc.qgSize);
}
if (p->rc.qgSize > p->maxCUSize)
{
diff -r 215eedc9ecc0 -r ab205f07f87b source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Wed Aug 24 13:17:45 2016 +0530
+++ b/source/encoder/ratecontrol.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -615,9 +615,18 @@
}
if (m_param->rc.cuTree)
{
- m_cuTreeStats.qpBuffer[0] = X265_MALLOC(uint16_t, m_ncu * sizeof(uint16_t));
- if (m_param->bBPyramid && m_param->rc.bStatRead)
- m_cuTreeStats.qpBuffer[1] = X265_MALLOC(uint16_t, m_ncu * sizeof(uint16_t));
+ if (m_param->rc.qgSize == 8)
+ {
+ m_cuTreeStats.qpBuffer[0] = X265_MALLOC(uint16_t, m_ncu * 4 * sizeof(uint16_t));
+ if (m_param->bBPyramid && m_param->rc.bStatRead)
+ m_cuTreeStats.qpBuffer[1] = X265_MALLOC(uint16_t, m_ncu * 4 * sizeof(uint16_t));
+ }
+ else
+ {
+ m_cuTreeStats.qpBuffer[0] = X265_MALLOC(uint16_t, m_ncu * sizeof(uint16_t));
+ if (m_param->bBPyramid && m_param->rc.bStatRead)
+ m_cuTreeStats.qpBuffer[1] = X265_MALLOC(uint16_t, m_ncu * sizeof(uint16_t));
+ }
m_cuTreeStats.qpBufPos = -1;
}
}
@@ -1424,6 +1433,11 @@
{
int index = m_encOrder[frame->m_poc];
uint8_t sliceTypeActual = (uint8_t)m_rce2Pass[index].sliceType;
+ int ncu;
+ if (m_param->rc.qgSize == 8)
+ ncu = m_ncu * 4;
+ else
+ ncu = m_ncu;
if (m_rce2Pass[index].keptAsRef)
{
/* TODO: We don't need pre-lookahead to measure AQ offsets, but there is currently
@@ -1437,7 +1451,7 @@
if (!fread(&type, 1, 1, m_cutreeStatFileIn))
goto fail;
- if (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], sizeof(uint16_t), m_ncu, m_cutreeStatFileIn) != (size_t)m_ncu)
+ if (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], sizeof(uint16_t), ncu, m_cutreeStatFileIn) != (size_t)ncu)
goto fail;
if (type != sliceTypeActual && m_cuTreeStats.qpBufPos == 1)
@@ -1448,8 +1462,8 @@
}
while(type != sliceTypeActual);
}
- primitives.fix8Unpack(frame->m_lowres.qpCuTreeOffset, m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], m_ncu);
- for (int i = 0; i < m_ncu; i++)
+ primitives.fix8Unpack(frame->m_lowres.qpCuTreeOffset, m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], ncu);
+ for (int i = 0; i < ncu; i++)
frame->m_lowres.invQscaleFactor[i] = x265_exp2fix8(frame->m_lowres.qpCuTreeOffset[i]);
m_cuTreeStats.qpBufPos--;
}
@@ -2593,6 +2607,11 @@
int RateControl::writeRateControlFrameStats(Frame* curFrame, RateControlEntry* rce)
{
FrameData& curEncData = *curFrame->m_encData;
+ int ncu;
+ if (m_param->rc.qgSize == 8)
+ ncu = m_ncu * 4;
+ else
+ ncu = m_ncu;
char cType = rce->sliceType == I_SLICE ? (rce->poc > 0 && m_param->bOpenGOP ? 'i' : 'I')
: rce->sliceType == P_SLICE ? 'P'
: IS_REFERENCED(curFrame) ? 'B' : 'b';
@@ -2612,10 +2631,10 @@
if (m_param->rc.cuTree && IS_REFERENCED(curFrame) && !m_param->rc.bStatRead)
{
uint8_t sliceType = (uint8_t)rce->sliceType;
- primitives.fix8Pack(m_cuTreeStats.qpBuffer[0], curFrame->m_lowres.qpCuTreeOffset, m_ncu);
+ primitives.fix8Pack(m_cuTreeStats.qpBuffer[0], curFrame->m_lowres.qpCuTreeOffset, ncu);
if (fwrite(&sliceType, 1, 1, m_cutreeStatFileOut) < 1)
goto writeFailure;
- if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), m_ncu, m_cutreeStatFileOut) < (size_t)m_ncu)
+ if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), ncu, m_cutreeStatFileOut) < (size_t)ncu)
goto writeFailure;
}
return 0;
diff -r 215eedc9ecc0 -r ab205f07f87b source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Wed Aug 24 13:17:45 2016 +0530
+++ b/source/encoder/slicetype.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -56,22 +56,36 @@
}
/* Find the energy of each block in Y/Cb/Cr plane */
-inline uint32_t acEnergyPlane(Frame *curFrame, pixel* src, intptr_t srcStride, int plane, int colorFormat)
+inline uint32_t acEnergyPlane(Frame *curFrame, pixel* src, intptr_t srcStride, int plane, int colorFormat, uint32_t qgSize)
{
if ((colorFormat != X265_CSP_I444) && plane)
{
- ALIGN_VAR_8(pixel, pix[8 * 8]);
- primitives.cu[BLOCK_8x8].copy_pp(pix, 8, src, srcStride);
- return acEnergyVar(curFrame, primitives.cu[BLOCK_8x8].var(pix, 8), 6, plane);
+ if (qgSize == 8)
+ {
+ ALIGN_VAR_4(pixel, pix[4 * 4]);
+ primitives.cu[BLOCK_4x4].copy_pp(pix, 4, src, srcStride);
+ return acEnergyVar(curFrame, primitives.cu[BLOCK_4x4].var(pix, 4), 4, plane);
+ }
+ else
+ {
+ ALIGN_VAR_8(pixel, pix[8 * 8]);
+ primitives.cu[BLOCK_8x8].copy_pp(pix, 8, src, srcStride);
+ return acEnergyVar(curFrame, primitives.cu[BLOCK_8x8].var(pix, 8), 6, plane);
+ }
}
else
- return acEnergyVar(curFrame, primitives.cu[BLOCK_16x16].var(src, srcStride), 8, plane);
+ {
+ if (qgSize == 8)
+ return acEnergyVar(curFrame, primitives.cu[BLOCK_8x8].var(src, srcStride), 6, plane);
+ else
+ return acEnergyVar(curFrame, primitives.cu[BLOCK_16x16].var(src, srcStride), 8, plane);
+ }
}
} // end anonymous namespace
/* Find the total AC energy of each block in all planes */
-uint32_t LookaheadTLD::acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp)
+uint32_t LookaheadTLD::acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize)
{
intptr_t stride = curFrame->m_fencPic->m_stride;
intptr_t cStride = curFrame->m_fencPic->m_strideC;
@@ -82,11 +96,11 @@
uint32_t var;
- var = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp);
+ var = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp, qgSize);
if (csp != X265_CSP_I400 && curFrame->m_fencPic->m_picCsp != X265_CSP_I400)
{
- var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] + blockOffsetChroma, cStride, 1, csp);
- var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] + blockOffsetChroma, cStride, 2, csp);
+ var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] + blockOffsetChroma, cStride, 1, csp, qgSize);
+ var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] + blockOffsetChroma, cStride, 2, csp, qgSize);
}
x265_emms();
return var;
@@ -97,7 +111,23 @@
/* Actual adaptive quantization */
int maxCol = curFrame->m_fencPic->m_picWidth;
int maxRow = curFrame->m_fencPic->m_picHeight;
- int blockCount = curFrame->m_lowres.maxBlocksInRow * curFrame->m_lowres.maxBlocksInCol;
+ int blockCount, loopIncr;
+ float modeOneConst, modeTwoConst;
+ if (param->rc.qgSize == 8)
+ {
+ blockCount = curFrame->m_lowres.maxBlocksInRowFullRes * curFrame->m_lowres.maxBlocksInColFullRes;
+ modeOneConst = 11.427f;
+ modeTwoConst = 8.f;
+ loopIncr = 8;
+ }
+ else
+ {
+ blockCount = widthInCU * heightInCU;
+ modeOneConst = 14.427f;
+ modeTwoConst = 11.f;
+ loopIncr = 16;
+ }
+ //int blockCount = curFrame->m_lowres.maxBlocksInRowFullRes * curFrame->m_lowres.maxBlocksInColFullRes;
float* quantOffsets = curFrame->m_quantOffsets;
for (int y = 0; y < 3; y++)
@@ -106,14 +136,14 @@
curFrame->m_lowres.wp_sum[y] = 0;
}
- /* Calculate Qp offset for each 16x16 block in the frame */
+ /* Calculate Qp offset for each 16x16 or 8x8 block in the frame */
int blockXY = 0;
int blockX = 0, blockY = 0;
double strength = 0.f;
if (param->rc.aqMode == X265_AQ_NONE || param->rc.aqStrength == 0)
{
/* Need to init it anyways for CU tree */
- int cuCount = widthInCU * heightInCU;
+ int cuCount = blockCount;
if (param->rc.aqMode && param->rc.aqStrength == 0)
{
@@ -137,9 +167,9 @@
/* Need variance data for weighted prediction */
if (param->bEnableWeightedPred || param->bEnableWeightedBiPred)
{
- for (blockY = 0; blockY < maxRow; blockY += 16)
- for (blockX = 0; blockX < maxCol; blockX += 16)
- acEnergyCu(curFrame, blockX, blockY, param->internalCsp);
+ for (blockY = 0; blockY < maxRow; blockY += loopIncr)
+ for (blockX = 0; blockX < maxCol; blockX += loopIncr)
+ acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
}
}
else
@@ -152,12 +182,12 @@
double bit_depth_correction = 1.f / (1 << (2*(X265_DEPTH-8)));
curFrame->m_lowres.frameVariance = 0;
uint64_t rowVariance = 0;
- for (blockY = 0; blockY < maxRow; blockY += 16)
+ for (blockY = 0; blockY < maxRow; blockY += loopIncr)
{
rowVariance = 0;
- for (blockX = 0; blockX < maxCol; blockX += 16)
+ for (blockX = 0; blockX < maxCol; blockX += loopIncr)
{
- uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp);
+ uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
curFrame->m_lowres.blockVariance[blockXY] = energy;
rowVariance += energy;
qp_adj = pow(energy * bit_depth_correction + 1, 0.1);
@@ -172,21 +202,21 @@
avg_adj /= blockCount;
avg_adj_pow2 /= blockCount;
strength = param->rc.aqStrength * avg_adj;
- avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (11.f)) / avg_adj;
+ avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (modeTwoConst)) / avg_adj;
bias_strength = param->rc.aqStrength;
}
else
strength = param->rc.aqStrength * 1.0397f;
blockXY = 0;
- for (blockY = 0; blockY < maxRow; blockY += 16)
+ for (blockY = 0; blockY < maxRow; blockY += loopIncr)
{
- for (blockX = 0; blockX < maxCol; blockX += 16)
+ for (blockX = 0; blockX < maxCol; blockX += loopIncr)
{
if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
{
qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
- qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - 11.f / (qp_adj * qp_adj));
+ qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - modeTwoConst / (qp_adj * qp_adj));
}
else if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
{
@@ -195,8 +225,8 @@
}
else
{
- uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp);
- qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (14.427f + 2 * (X265_DEPTH - 8)));
+ uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp,param->rc.qgSize);
+ qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (modeOneConst + 2 * (X265_DEPTH - 8)));
}
if (quantOffsets != NULL)
qp_adj += quantOffsets[blockXY];
@@ -227,7 +257,7 @@
}
}
-void LookaheadTLD::lowresIntraEstimate(Lowres& fenc)
+void LookaheadTLD::lowresIntraEstimate(Lowres& fenc, uint32_t qgSize)
{
ALIGN_VAR_32(pixel, prediction[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
pixel fencIntra[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
@@ -314,12 +344,21 @@
fenc.lowresCosts[0][0][cuXY] = (uint16_t)(X265_MIN(icost, LOWRES_COST_MASK) | (0 << LOWRES_COST_SHIFT));
fenc.intraCost[cuXY] = icost;
fenc.intraMode[cuXY] = (uint8_t)ilowmode;
-
- /* do not include edge blocks in the frame cost estimates, they are not very accurate */
+ /* do not include edge blocks in the
+ frame cost estimates, they are not very accurate */
const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
-
- int icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor[cuXY] + 128) >> 8) : icost;
+ int invQscaleFactor, icostAq;
+ if (qgSize == 8)
+ {
+ invQscaleFactor = (fenc.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4] +
+ fenc.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + 1] +
+ fenc.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + fenc.maxBlocksInRowFullRes] +
+ fenc.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + fenc.maxBlocksInRowFullRes + 1]) / 4;
+ icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * invQscaleFactor + 128) >> 8) : icost;
+ }
+ else
+ icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor[cuXY] +128) >> 8) : icost;
if (bFrameScoreCU)
{
@@ -812,9 +851,17 @@
uint16_t lowresCuCost = curFrame->m_lowres.lowresCostForRc[lowresCuIdx] & LOWRES_COST_MASK;
if (qp_offset)
{
- lowresCuCost = (uint16_t)((lowresCuCost * x265_exp2fix8(qp_offset[lowresCuIdx]) + 128) >> 8);
+ double qpOffset;
+ if (m_param->rc.qgSize == 8)
+ qpOffset = (qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4] +
+ qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + 1] +
+ qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes] +
+ qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes + 1]) / 4;
+ else
+ qpOffset = qp_offset[lowresCuIdx];
+ lowresCuCost = (uint16_t)((lowresCuCost * x265_exp2fix8(qpOffset) + 128) >> 8);
int32_t intraCuCost = curFrame->m_lowres.intraCost[lowresCuIdx];
- curFrame->m_lowres.intraCost[lowresCuIdx] = (intraCuCost * x265_exp2fix8(qp_offset[lowresCuIdx]) + 128) >> 8;
+ curFrame->m_lowres.intraCost[lowresCuIdx] = (intraCuCost * x265_exp2fix8(qpOffset) + 128) >> 8;
}
if (m_param->bIntraRefresh && slice->m_sliceType == X265_TYPE_P)
for (uint32_t x = curFrame->m_encData->m_pir.pirStartCol; x <= curFrame->m_encData->m_pir.pirEndCol; x++)
@@ -850,7 +897,7 @@
/* cu-tree offsets were read from stats file */;
else if (m_lookahead.m_bAdaptiveQuant)
tld.calcAdaptiveQuantFrame(preFrame, m_lookahead.m_param);
- tld.lowresIntraEstimate(preFrame->m_lowres);
+ tld.lowresIntraEstimate(preFrame->m_lowres, m_lookahead.m_param->rc.qgSize);
preFrame->m_lowresInit = true;
m_lock.acquire();
@@ -1669,7 +1716,10 @@
if (bIntra)
{
memset(frames[0]->propagateCost, 0, m_cuCount * sizeof(uint16_t));
- memcpy(frames[0]->qpCuTreeOffset, frames[0]->qpAqOffset, m_cuCount * sizeof(double));
+ if (m_param->rc.qgSize == 8)
+ memcpy(frames[0]->qpCuTreeOffset, frames[0]->qpAqOffset, m_cuCount * 4 * sizeof(double));
+ else
+ memcpy(frames[0]->qpCuTreeOffset, frames[0]->qpAqOffset, m_cuCount * sizeof(double));
return;
}
std::swap(frames[lastnonb]->propagateCost, frames[0]->propagateCost);
@@ -1764,9 +1814,14 @@
for (uint16_t blocky = 0; blocky < m_8x8Height; blocky++)
{
int cuIndex = blocky * strideInCU;
- primitives.propagateCost(m_scratch, propagateCost,
- frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
- frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_8x8Width);
+ if (m_param->rc.qgSize == 8)
+ primitives.propagateCost(m_scratch, propagateCost,
+ frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
+ frames[b]->invQscaleFactor + (cuIndex * 4), &fpsFactor, m_8x8Width, m_param->rc.qgSize);
+ else
+ primitives.propagateCost(m_scratch, propagateCost,
+ frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
+ frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_8x8Width, m_param->rc.qgSize);
if (referenced)
propagateCost += m_8x8Width;
@@ -1852,14 +1907,43 @@
if (ref0Distance && frame->weightedCostDelta[ref0Distance - 1] > 0)
weightdelta = (1.0 - frame->weightedCostDelta[ref0Distance - 1]);
- for (int cuIndex = 0; cuIndex < m_cuCount; cuIndex++)
+ if (m_param->rc.qgSize == 8)
{
- int intracost = (frame->intraCost[cuIndex] * frame->invQscaleFactor[cuIndex] + 128) >> 8;
- if (intracost)
+ for (int cuY = 0; cuY < m_8x8Height; cuY++)
{
- int propagateCost = (frame->propagateCost[cuIndex] * fpsFactor + 128) >> 8;
- double log2_ratio = X265_LOG2(intracost + propagateCost) - X265_LOG2(intracost) + weightdelta;
- frame->qpCuTreeOffset[cuIndex] = frame->qpAqOffset[cuIndex] - m_cuTreeStrength * log2_ratio;
+ for (int cuX = 0; cuX < m_8x8Width; cuX++)
+ {
+ const int cuXY = cuX + cuY * m_8x8Width;
+ int invQscaleFactor = (frame->invQscaleFactor[cuX * 2 + cuY * m_8x8Width * 4] +
+ frame->invQscaleFactor[cuX * 2 + cuY * m_8x8Width * 4 + 1] +
+ frame->invQscaleFactor[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes] +
+ frame->invQscaleFactor[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes + 1]) / 4;
+
+ int intracost = ((frame->intraCost[cuXY]) / 4 * invQscaleFactor + 128) >> 8;
+ if (intracost)
+ {
+ int propagateCost = ((frame->propagateCost[cuXY]) / 4 * fpsFactor + 128) >> 8;
+ double log2_ratio = X265_LOG2(intracost + propagateCost) - X265_LOG2(intracost) + weightdelta;
+ frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4] - m_cuTreeStrength * (log2_ratio);
+ frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4 + 1] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4 + 1] - m_cuTreeStrength * (log2_ratio);
+ frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes] - m_cuTreeStrength * (log2_ratio);
+ frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes + 1] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4 + frame->maxBlocksInRowFullRes + 1] - m_cuTreeStrength * (log2_ratio);
+ }
+
+ }
+ }
+ }
+ else
+ {
+ for (int cuIndex = 0; cuIndex < m_cuCount; cuIndex++)
+ {
+ int intracost = (frame->intraCost[cuIndex] * frame->invQscaleFactor[cuIndex] + 128) >> 8;
+ if (intracost)
+ {
+ int propagateCost = (frame->propagateCost[cuIndex] * fpsFactor + 128) >> 8;
+ double log2_ratio = X265_LOG2(intracost + propagateCost) - X265_LOG2(intracost) + weightdelta;
+ frame->qpCuTreeOffset[cuIndex] = frame->qpAqOffset[cuIndex] - m_cuTreeStrength * log2_ratio;
+ }
}
}
}
@@ -1883,7 +1967,14 @@
{
int cuxy = cux + cuy * m_8x8Width;
int cuCost = frames[b]->lowresCosts[b - p0][p1 - b][cuxy] & LOWRES_COST_MASK;
- double qp_adj = qp_offset[cuxy];
+ double qp_adj;
+ if (m_param->rc.qgSize == 8)
+ qp_adj = (qp_offset[cux * 2 + cuy * m_8x8Width * 4] +
+ qp_offset[cux * 2 + cuy * m_8x8Width * 4 + 1] +
+ qp_offset[cux * 2 + cuy * m_8x8Width * 4 + frames[b]->maxBlocksInRowFullRes] +
+ qp_offset[cux * 2 + cuy * m_8x8Width * 4 + frames[b]->maxBlocksInRowFullRes + 1]) / 4;
+ else
+ qp_adj = qp_offset[cuxy];
cuCost = (cuCost * x265_exp2fix8(qp_adj) + 128) >> 8;
rowSatd[cuy] += cuCost;
if ((cuy > 0 && cuy < m_8x8Height - 1 &&
@@ -2202,8 +2293,17 @@
/* do not include edge blocks in the frame cost estimates, they are not very accurate */
const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
-
- int bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor[cuXY] + 128) >> 8) : bcost;
+ int invQscaleFactor, bcostAq;
+ if (m_lookahead.m_param->rc.qgSize == 8)
+ {
+ invQscaleFactor = (fenc->invQscaleFactor[cuX * 2 + cuY * widthInCU * 4] +
+ fenc->invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + 1] +
+ fenc->invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + fenc->maxBlocksInRowFullRes] +
+ fenc->invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + fenc->maxBlocksInRowFullRes + 1]) / 4;
+ bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * invQscaleFactor + 128) >> 8) : bcost;
+ }
+ else
+ bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor[cuXY] +128) >> 8) : bcost;
if (bFrameScoreCU)
{
diff -r 215eedc9ecc0 -r ab205f07f87b source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Wed Aug 24 13:17:45 2016 +0530
+++ b/source/encoder/slicetype.h Wed Aug 17 10:47:11 2016 +0530
@@ -84,13 +84,13 @@
~LookaheadTLD() { X265_FREE(wbuffer[0]); }
void calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param);
- void lowresIntraEstimate(Lowres& fenc);
+ void lowresIntraEstimate(Lowres& fenc, uint32_t qgSize);
void weightsAnalyse(Lowres& fenc, Lowres& ref);
protected:
- uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp);
+ uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize);
uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
bool allocWeightedRef(Lowres& fenc);
};
diff -r 215eedc9ecc0 -r ab205f07f87b source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Wed Aug 24 13:17:45 2016 +0530
+++ b/source/test/pixelharness.cpp Wed Aug 17 10:47:11 2016 +0530
@@ -1387,8 +1387,8 @@
{
int width = 16 + rand() % 64;
int index = i % TEST_CASES;
- checked(opt, opt_dest, ushort_test_buff[index] + j, int_test_buff[index] + j, ushort_test_buff[index] + j, int_test_buff[index] + j, &fps, width);
- ref(ref_dest, ushort_test_buff[index] + j, int_test_buff[index] + j, ushort_test_buff[index] + j, int_test_buff[index] + j, &fps, width);
+ checked(opt, opt_dest, ushort_test_buff[index] + j, int_test_buff[index] + j, ushort_test_buff[index] + j, int_test_buff[index] + j, &fps, width, 32);
+ ref(ref_dest, ushort_test_buff[index] + j, int_test_buff[index] + j, ushort_test_buff[index] + j, int_test_buff[index] + j, &fps, width, 32);
if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
return false;
@@ -3102,7 +3102,7 @@
if (opt.propagateCost)
{
HEADER0("propagateCost");
- REPORT_SPEEDUP(opt.propagateCost, ref.propagateCost, ibuf1, ushort_test_buff[0], int_test_buff[0], ushort_test_buff[0], int_test_buff[0], double_test_buff[0], 80);
+ REPORT_SPEEDUP(opt.propagateCost, ref.propagateCost, ibuf1, ushort_test_buff[0], int_test_buff[0], ushort_test_buff[0], int_test_buff[0], double_test_buff[0], 80, 32);
}
if (opt.fix8Pack)
diff -r 215eedc9ecc0 -r ab205f07f87b source/x265.h
--- a/source/x265.h Wed Aug 24 13:17:45 2016 +0530
+++ b/source/x265.h Wed Aug 17 10:47:11 2016 +0530
@@ -1155,7 +1155,7 @@
/* Enable adaptive quantization at CU granularity. This parameter specifies
* the minimum CU size at which QP can be adjusted, i.e. Quantization Group
- * (QG) size. Allowed values are 64, 32, 16 provided it falls within the
+ * (QG) size. Allowed values are 64, 32, 16, 8 provided it falls within the
* inclusuve range [maxCUSize, minCUSize]. Experimental, default: maxCUSize */
uint32_t qgSize;
diff -r 215eedc9ecc0 -r ab205f07f87b source/x265cli.h
--- a/source/x265cli.h Wed Aug 24 13:17:45 2016 +0530
+++ b/source/x265cli.h Wed Aug 17 10:47:11 2016 +0530
@@ -386,7 +386,7 @@
H0(" --analysis-file <filename> Specify file name used for either dumping or reading analysis data.\n");
H0(" --aq-mode <integer> Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark scenes. Default %d\n", param->rc.aqMode);
H0(" --aq-strength <float> Reduces blocking and blurring in flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength);
- H0(" --qg-size <int> Specifies the size of the quantization group (64, 32, 16). Default %d\n", param->rc.qgSize);
+ H0(" --qg-size <int> Specifies the size of the quantization group (64, 32, 16, 8). Default %d\n", param->rc.qgSize);
H0(" --[no-]cutree Enable cutree for Adaptive Quantization. Default %s\n", OPT(param->rc.cuTree));
H0(" --[no-]rc-grain Enable ratecontrol mode to handle grains specifically. turned on with tune grain. Default %s\n", OPT(param->rc.bEnableGrain));
H1(" --ipratio <float> QP factor between I and P. Default %.2f\n", param->rc.ipFactor);
More information about the x265-devel
mailing list