[x265] [PATCH] aq: implementation of fine-grained adaptive quantization
deepthi at multicorewareinc.com
deepthi at multicorewareinc.com
Sun Apr 5 18:33:02 CEST 2015
# HG changeset patch
# User Deepthi Nandakumar <deepthi at multicorewareinc.com>
# Date 1427100822 -19800
# Mon Mar 23 14:23:42 2015 +0530
# Node ID d6e059bd8a9cd0cb9aad7444b1a141a59ac01193
# Parent 335c728bbd62018e1e3ed03a4df0514c213e9a4e
aq: implementation of fine-grained adaptive quantization
Currently adaptive quantization adjusts the QP values on 64x64 pixel CodingTree
units (CTUs) across a video frame. The new param option --qg-size will
enable QP to be adjusted to individual quantization groups (QGs) of size 64/32/16
diff -r 335c728bbd62 -r d6e059bd8a9c doc/reST/cli.rst
--- a/doc/reST/cli.rst Fri Apr 03 14:27:32 2015 -0500
+++ b/doc/reST/cli.rst Mon Mar 23 14:23:42 2015 +0530
@@ -1111,6 +1111,13 @@
**Range of values:** 0.0 to 3.0
+.. option:: --qg-size <64|32|16>
+ Enable adaptive quantization for sub-CTUs. This parameter specifies
+ the minimum CU size at which QP can be adjusted, ie. Quantization Group
+ size. Allowed range of values are 64, 32, 16 provided this falls within
+ the inclusive range [maxCUSize, minCUSize]. Experimental.
+ Default: same as maxCUSize
+
.. option:: --cutree, --no-cutree
Enable the use of lookahead's lowres motion vector fields to
diff -r 335c728bbd62 -r d6e059bd8a9c source/common/cudata.cpp
--- a/source/common/cudata.cpp Fri Apr 03 14:27:32 2015 -0500
+++ b/source/common/cudata.cpp Mon Mar 23 14:23:42 2015 +0530
@@ -298,7 +298,7 @@
}
// initialize Sub partition
-void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom)
+void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp)
{
m_absIdxInCTU = cuGeom.absPartIdx;
m_encData = ctu.m_encData;
@@ -312,8 +312,8 @@
m_cuAboveRight = ctu.m_cuAboveRight;
X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
- /* sequential memsets */
- m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
+ m_partSet((uint8_t*)m_qp, (uint8_t)qp);
+
m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize);
m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless);
diff -r 335c728bbd62 -r d6e059bd8a9c source/common/cudata.h
--- a/source/common/cudata.h Fri Apr 03 14:27:32 2015 -0500
+++ b/source/common/cudata.h Mon Mar 23 14:23:42 2015 +0530
@@ -182,7 +182,7 @@
static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]);
void initCTU(const Frame& frame, uint32_t cuAddr, int qp);
- void initSubCU(const CUData& ctu, const CUGeom& cuGeom);
+ void initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp);
void initLosslessCU(const CUData& cu, const CUGeom& cuGeom);
void copyPartFrom(const CUData& cu, const CUGeom& childGeom, uint32_t subPartIdx);
diff -r 335c728bbd62 -r d6e059bd8a9c source/common/param.cpp
--- a/source/common/param.cpp Fri Apr 03 14:27:32 2015 -0500
+++ b/source/common/param.cpp Mon Mar 23 14:23:42 2015 +0530
@@ -209,6 +209,7 @@
param->rc.zones = NULL;
param->rc.bEnableSlowFirstPass = 0;
param->rc.bStrictCbr = 0;
+ param->rc.QGSize = 64; /* Same as maxCUSize */
/* Video Usability Information (VUI) */
param->vui.aspectRatioIdc = 0;
@@ -263,6 +264,7 @@
param->rc.aqStrength = 0.0;
param->rc.aqMode = X265_AQ_NONE;
param->rc.cuTree = 0;
+ param->rc.QGSize = 32;
param->bEnableFastIntra = 1;
}
else if (!strcmp(preset, "superfast"))
@@ -279,6 +281,7 @@
param->rc.aqStrength = 0.0;
param->rc.aqMode = X265_AQ_NONE;
param->rc.cuTree = 0;
+ param->rc.QGSize = 32;
param->bEnableSAO = 0;
param->bEnableFastIntra = 1;
}
@@ -292,6 +295,7 @@
param->rdLevel = 2;
param->maxNumReferences = 1;
param->rc.cuTree = 0;
+ param->rc.QGSize = 32;
param->bEnableFastIntra = 1;
}
else if (!strcmp(preset, "faster"))
@@ -843,6 +847,7 @@
OPT2("pools", "numa-pools") p->numaPools = strdup(value);
OPT("lambda-file") p->rc.lambdaFileName = strdup(value);
OPT("analysis-file") p->analysisFileName = strdup(value);
+ OPT("qg-size") p->rc.QGSize = atoi(value);
else
return X265_PARAM_BAD_NAME;
#undef OPT
diff -r 335c728bbd62 -r d6e059bd8a9c source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Fri Apr 03 14:27:32 2015 -0500
+++ b/source/encoder/analysis.cpp Mon Mar 23 14:23:42 2015 +0530
@@ -75,6 +75,8 @@
m_reuseInterDataCTU = NULL;
m_reuseRef = NULL;
m_reuseBestMergeCand = NULL;
+ for (int i = 0; i < NUM_CU_DEPTH; i++)
+ m_qp[i] = NULL;
}
bool Analysis::create(ThreadLocalData *tld)
@@ -101,6 +103,7 @@
ok &= md.pred[j].reconYuv.create(cuSize, csp);
md.pred[j].fencYuv = &md.fencYuv;
}
+ m_qp[depth] = X265_MALLOC(int, 1i64 << (depth << 1));
}
return ok;
@@ -118,6 +121,7 @@
m_modeDepth[i].pred[j].predYuv.destroy();
m_modeDepth[i].pred[j].reconYuv.destroy();
}
+ X265_FREE(m_qp[i]);
}
}
@@ -132,6 +136,34 @@
m_modeDepth[i].pred[j].invalidate();
#endif
invalidateContexts(0);
+ if (m_slice->m_pps->bUseDQP)
+ {
+ CUGeom *curCUGeom = (CUGeom *)&cuGeom;
+ CUGeom *parentGeom = (CUGeom *)&cuGeom;
+
+ m_qp[0][0] = calculateQpforCuSize(ctu, *curCUGeom);
+ curCUGeom = curCUGeom + curCUGeom->childOffset;
+ parentGeom = curCUGeom;
+ if (m_slice->m_pps->maxCuDQPDepth >= 1)
+ {
+ for (int i = 0; i < 4; i++)
+ {
+ m_qp[1][i] = calculateQpforCuSize(ctu, *(parentGeom + i));
+ if (m_slice->m_pps->maxCuDQPDepth == 2)
+ {
+ curCUGeom = parentGeom + i + (parentGeom + i)->childOffset;
+ for (int j = 0; j < 4; j++)
+ m_qp[2][i * 4 + j] = calculateQpforCuSize(ctu, *(curCUGeom + j));
+ }
+ }
+ }
+ this->setQP(*m_slice, m_qp[0][0]);
+ m_qp[0][0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_qp[0][0]);
+ ctu.setQPSubParts((int8_t)m_qp[0][0], 0, 0);
+ }
+ else
+ m_qp[0][0] = m_slice->m_sliceQp;
+
m_quant.setQPforQuant(ctu);
m_rqt[0].cur.load(initialContext);
m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);
@@ -155,7 +187,7 @@
uint32_t zOrder = 0;
if (m_slice->m_sliceType == I_SLICE)
{
- compressIntraCU(ctu, cuGeom, zOrder);
+ compressIntraCU(ctu, cuGeom, zOrder, m_qp[0][0], 0);
if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)
{
CUData *bestCU = &m_modeDepth[0].bestMode->cu;
@@ -173,18 +205,18 @@
* they are available for intra predictions */
m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);
- compressInterCU_rd0_4(ctu, cuGeom);
+ compressInterCU_rd0_4(ctu, cuGeom, m_qp[0][0], 0);
/* generate residual for entire CTU at once and copy to reconPic */
encodeResidue(ctu, cuGeom);
}
else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >= 2)
- compressInterCU_dist(ctu, cuGeom);
+ compressInterCU_dist(ctu, cuGeom, m_qp[0][0], 0);
else if (m_param->rdLevel <= 4)
- compressInterCU_rd0_4(ctu, cuGeom);
+ compressInterCU_rd0_4(ctu, cuGeom, m_qp[0][0], 0);
else
{
- compressInterCU_rd5_6(ctu, cuGeom, zOrder);
+ compressInterCU_rd5_6(ctu, cuGeom, zOrder, m_qp[0][0], 0);
if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.interData)
{
CUData *bestCU = &m_modeDepth[0].bestMode->cu;
@@ -223,7 +255,7 @@
}
}
-void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t& zOrder)
+void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t& zOrder, int32_t qp, uint32_t partIdx)
{
uint32_t depth = cuGeom.depth;
ModeDepth& md = m_modeDepth[depth];
@@ -232,6 +264,13 @@
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
+ if (m_slice->m_pps->bUseDQP && depth && depth <= m_slice->m_pps->maxCuDQPDepth)
+ {
+ qp = m_qp[depth][partIdx];
+ this->setQP(*m_slice, qp);
+ qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
+ }
+
if (m_param->analysisMode == X265_ANALYSIS_LOAD)
{
uint8_t* reuseDepth = &m_reuseIntraDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
@@ -241,11 +280,10 @@
if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.absPartIdx)
{
- m_quant.setQPforQuant(parentCTU);
-
PartSize size = (PartSize)reusePartSizes[zOrder];
Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : md.pred[PRED_INTRA_NxN];
- mode.cu.initSubCU(parentCTU, cuGeom);
+ mode.cu.initSubCU(parentCTU, cuGeom, qp);
+ m_quant.setQPforQuant(mode.cu);
checkIntra(mode, cuGeom, size, &reuseModes[zOrder], &reuseChromaModes[zOrder]);
checkBestMode(mode, depth);
@@ -262,15 +300,14 @@
}
else if (mightNotSplit)
{
- m_quant.setQPforQuant(parentCTU);
-
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
+ m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
checkBestMode(md.pred[PRED_INTRA], depth);
if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
{
- md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, NULL);
checkBestMode(md.pred[PRED_INTRA_NxN], depth);
}
@@ -287,7 +324,7 @@
Mode* splitPred = &md.pred[PRED_SPLIT];
splitPred->initCosts();
CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom);
+ splitCU->initSubCU(parentCTU, cuGeom, qp);
uint32_t nextDepth = depth + 1;
ModeDepth& nd = m_modeDepth[nextDepth];
@@ -301,7 +338,7 @@
{
m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressIntraCU(parentCTU, childGeom, zOrder);
+ compressIntraCU(parentCTU, childGeom, zOrder, qp, partIdx * 4 + subPartIdx);
// Save best CU and pred data for this sub CU
splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -490,7 +527,7 @@
while (task >= 0);
}
-void Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom)
+void Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, uint32_t partIdx)
{
uint32_t depth = cuGeom.depth;
uint32_t cuAddr = parentCTU.m_cuAddr;
@@ -503,6 +540,13 @@
X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not support RD 0 or 1\n");
+ if (m_slice->m_pps->bUseDQP && depth && depth <= m_slice->m_pps->maxCuDQPDepth)
+ {
+ qp = m_qp[depth][partIdx];
+ this->setQP(*m_slice, qp);
+ qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
+ }
+
if (mightNotSplit && depth >= minDepth)
{
int bTryAmp = m_slice->m_sps->maxAMPDepth > depth && (cuGeom.log2CUSize < 6 || m_param->rdLevel > 4);
@@ -511,28 +555,28 @@
PMODE pmode(*this, cuGeom);
/* Initialize all prediction CUs based on parentCTU */
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
if (bTryIntra)
{
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3 && m_param->rdLevel >= 5)
- md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
pmode.modes[pmode.m_jobTotal++] = PRED_INTRA;
}
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_2Nx2N;
- md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2Nx2N;
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
if (m_param->bEnableRectInter)
{
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_2NxN;
- md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_Nx2N;
+ md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxN;
+ md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_Nx2N;
}
if (bTryAmp)
{
- md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnU;
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnD;
- md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_nLx2N;
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_nRx2N;
+ md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnU;
+ md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnD;
+ md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nLx2N;
+ md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nRx2N;
}
pmode.tryBondPeers(*m_frame->m_encData->m_jobProvider, pmode.m_jobTotal);
@@ -662,7 +706,7 @@
if (md.bestMode->rdCost == MAX_INT64 && !bTryIntra)
{
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
checkBestMode(md.pred[PRED_INTRA], depth);
@@ -688,7 +732,7 @@
Mode* splitPred = &md.pred[PRED_SPLIT];
splitPred->initCosts();
CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom);
+ splitCU->initSubCU(parentCTU, cuGeom, qp);
uint32_t nextDepth = depth + 1;
ModeDepth& nd = m_modeDepth[nextDepth];
@@ -702,7 +746,7 @@
{
m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_dist(parentCTU, childGeom);
+ compressInterCU_dist(parentCTU, childGeom, qp, partIdx * 4 + subPartIdx);
// Save best CU and pred data for this sub CU
splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -741,7 +785,7 @@
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
}
-void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom)
+void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, uint32_t partIdx)
{
uint32_t depth = cuGeom.depth;
uint32_t cuAddr = parentCTU.m_cuAddr;
@@ -752,13 +796,20 @@
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
+ if (m_slice->m_pps->bUseDQP && depth && depth <= m_slice->m_pps->maxCuDQPDepth)
+ {
+ qp = m_qp[depth][partIdx];
+ this->setQP(*m_slice, qp);
+ qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
+ }
+
if (mightNotSplit && depth >= minDepth)
{
bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
/* Compute Merge Cost */
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
bool earlyskip = false;
@@ -767,24 +818,24 @@
if (!earlyskip)
{
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N);
if (m_slice->m_sliceType == B_SLICE)
{
- md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
}
Mode *bestInter = &md.pred[PRED_2Nx2N];
if (m_param->bEnableRectInter)
{
- md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N);
if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_Nx2N];
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN);
if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxN];
@@ -806,24 +857,24 @@
if (bHor)
{
- md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU);
if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxnU];
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD);
if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxnD];
}
if (bVer)
{
- md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N);
if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_nLx2N];
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N);
if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_nRx2N];
@@ -855,7 +906,7 @@
if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) ||
md.bestMode->sa8dCost == MAX_INT64)
{
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
checkBestMode(md.pred[PRED_INTRA], depth);
@@ -873,7 +924,7 @@
if (bTryIntra || md.bestMode->sa8dCost == MAX_INT64)
{
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
if (md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
md.bestMode = &md.pred[PRED_INTRA];
@@ -960,7 +1011,7 @@
Mode* splitPred = &md.pred[PRED_SPLIT];
splitPred->initCosts();
CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom);
+ splitCU->initSubCU(parentCTU, cuGeom, qp);
uint32_t nextDepth = depth + 1;
ModeDepth& nd = m_modeDepth[nextDepth];
@@ -974,7 +1025,7 @@
{
m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_rd0_4(parentCTU, childGeom);
+ compressInterCU_rd0_4(parentCTU, childGeom, qp, partIdx * 4 + subPartIdx);
// Save best CU and pred data for this sub CU
splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1025,7 +1076,7 @@
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
}
-void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder)
+void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp, uint32_t partIdx)
{
uint32_t depth = cuGeom.depth;
ModeDepth& md = m_modeDepth[depth];
@@ -1034,14 +1085,21 @@
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
+ if (m_slice->m_pps->bUseDQP && depth && depth <= m_slice->m_pps->maxCuDQPDepth)
+ {
+ qp = m_qp[depth][partIdx];
+ this->setQP(*m_slice, qp);
+ qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
+ }
+
if (m_param->analysisMode == X265_ANALYSIS_LOAD)
{
uint8_t* reuseDepth = &m_reuseInterDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
uint8_t* reuseModes = &m_reuseInterDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.absPartIdx && reuseModes[zOrder] == MODE_SKIP)
{
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom, true);
if (m_bTryLossless)
@@ -1060,20 +1118,20 @@
if (mightNotSplit)
{
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom, false);
bool earlySkip = m_param->bEnableEarlySkip && md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
if (!earlySkip)
{
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
if (m_slice->m_sliceType == B_SLICE)
{
- md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
{
@@ -1084,11 +1142,11 @@
if (m_param->bEnableRectInter)
{
- md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, false);
checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, false);
checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
}
@@ -1111,21 +1169,21 @@
if (bHor)
{
- md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, bMergeOnly);
checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, bMergeOnly);
checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
}
if (bVer)
{
- md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, bMergeOnly);
checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, bMergeOnly);
checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
}
@@ -1133,13 +1191,13 @@
if (m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames)
{
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
checkBestMode(md.pred[PRED_INTRA], depth);
if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
{
- md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, NULL);
checkBestMode(md.pred[PRED_INTRA_NxN], depth);
}
@@ -1159,7 +1217,7 @@
Mode* splitPred = &md.pred[PRED_SPLIT];
splitPred->initCosts();
CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom);
+ splitCU->initSubCU(parentCTU, cuGeom, qp);
uint32_t nextDepth = depth + 1;
ModeDepth& nd = m_modeDepth[nextDepth];
@@ -1173,7 +1231,7 @@
{
m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_rd5_6(parentCTU, childGeom, zOrder);
+ compressInterCU_rd5_6(parentCTU, childGeom, zOrder, qp, partIdx * 4 + subPartIdx);
// Save best CU and pred data for this sub CU
splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1913,7 +1971,7 @@
return false;
}
-int Analysis::calculateQpforCuSize(CUData& ctu, const CUGeom& cuGeom)
+int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom)
{
uint32_t ctuAddr = ctu.m_cuAddr;
FrameData& curEncData = *m_frame->m_encData;
diff -r 335c728bbd62 -r d6e059bd8a9c source/encoder/analysis.h
--- a/source/encoder/analysis.h Fri Apr 03 14:27:32 2015 -0500
+++ b/source/encoder/analysis.h Mon Mar 23 14:23:42 2015 +0530
@@ -90,6 +90,7 @@
void processPmode(PMODE& pmode, Analysis& slave);
ModeDepth m_modeDepth[NUM_CU_DEPTH];
+ int* m_qp[NUM_CU_DEPTH];
bool m_bTryLossless;
bool m_bChromaSa8d;
@@ -109,12 +110,12 @@
uint32_t* m_reuseBestMergeCand;
/* full analysis for an I-slice CU */
- void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
+ void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qpDepth, uint32_t partIdx);
/* full analysis for a P or B slice CU */
- void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom);
- void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom);
- void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
+ void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qpDepth, uint32_t partIdx);
+ void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qpDepth, uint32_t partIdx);
+ void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qpDepth, uint32_t partIdx);
/* measure merge and skip */
void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom);
@@ -139,7 +140,7 @@
/* generate residual and recon pixels for an entire CTU recursively (RD0) */
void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom);
- int calculateQpforCuSize(CUData& ctu, const CUGeom& cuGeom);
+ int calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom);
/* check whether current mode is the new best */
inline void checkBestMode(Mode& mode, uint32_t depth)
diff -r 335c728bbd62 -r d6e059bd8a9c source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Apr 03 14:27:32 2015 -0500
+++ b/source/encoder/encoder.cpp Mon Mar 23 14:23:42 2015 +0530
@@ -1557,15 +1557,12 @@
bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
if (!m_param->bLossless && (m_param->rc.aqMode || bIsVbv))
- {
pps->bUseDQP = true;
- pps->maxCuDQPDepth = 0; /* TODO: make configurable? */
- }
else
- {
pps->bUseDQP = false;
- pps->maxCuDQPDepth = 0;
- }
+
+ pps->maxCuDQPDepth = g_log2Size[m_param->maxCUSize] - g_log2Size[m_param->rc.QGSize];
+ X265_CHECK(pps->maxCuDQPDepth <= 2, "max CU DQP depth cannot be greater than 2");
pps->chromaQpOffset[0] = m_param->cbQpOffset;
pps->chromaQpOffset[1] = m_param->crQpOffset;
@@ -1788,6 +1785,22 @@
p->analysisMode = X265_ANALYSIS_OFF;
x265_log(p, X265_LOG_WARNING, "Analysis save and load mode not supported for distributed mode analysis\n");
}
+
+ bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
+ if (!m_param->bLossless && (m_param->rc.aqMode || bIsVbv))
+ {
+ if (p->rc.QGSize < X265_MAX(16, p->minCUSize))
+ {
+ p->rc.QGSize = X265_MAX(16, p->minCUSize);
+ x265_log(p, X265_LOG_WARNING, "QGSize should be greater than or equal to 16 and minCUSize, setting QGSize = %d \n", p->rc.QGSize);
+ }
+
+ if (p->rc.QGSize > p->maxCUSize)
+ {
+ p->rc.QGSize = p->maxCUSize;
+ x265_log(p, X265_LOG_WARNING, "QGSize should be less than or equal to maxCUSize, setting QGSize = %d \n", p->rc.QGSize);
+ }
+ }
}
void Encoder::allocAnalysis(x265_analysis_data* analysis)
diff -r 335c728bbd62 -r d6e059bd8a9c source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Fri Apr 03 14:27:32 2015 -0500
+++ b/source/encoder/frameencoder.cpp Mon Mar 23 14:23:42 2015 +0530
@@ -852,9 +852,7 @@
if (m_param->rc.aqMode || bIsVbv)
{
int qp = calcQpForCu(cuAddr, curEncData.m_cuStat[cuAddr].baseQp);
- tld.analysis.setQP(*slice, qp);
qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
- ctu->setQPSubParts((int8_t)qp, 0, 0);
curEncData.m_rowStat[row].sumQpAq += qp;
}
else
diff -r 335c728bbd62 -r d6e059bd8a9c source/x265.h
--- a/source/x265.h Fri Apr 03 14:27:32 2015 -0500
+++ b/source/x265.h Mon Mar 23 14:23:42 2015 +0530
@@ -988,6 +988,12 @@
/* Enable stricter conditions to check bitrate deviations in CBR mode. May compromise
* quality to maintain bitrate adherence */
int bStrictCbr;
+
+ /* Enable adaptive quantization at CU granularity. This parameter specifies
+ * the minimum CU size at which QP can be adjusted, i.e. Quantization Group
+ * (QG) size. Allowed values are 64, 32, 16 provided it falls within the
+ * inclusuve range [maxCUSize, minCUSize]. Experimental, default: maxCUSize*/
+ uint32_t QGSize;
} rc;
/*== Video Usability Information ==*/
diff -r 335c728bbd62 -r d6e059bd8a9c source/x265cli.h
--- a/source/x265cli.h Fri Apr 03 14:27:32 2015 -0500
+++ b/source/x265cli.h Mon Mar 23 14:23:42 2015 +0530
@@ -205,6 +205,7 @@
{ "strict-cbr", no_argument, NULL, 0 },
{ "temporal-layers", no_argument, NULL, 0 },
{ "no-temporal-layers", no_argument, NULL, 0 },
+ { "qg-size", required_argument, NULL, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
@@ -352,6 +353,7 @@
H0(" --analysis-file <filename> Specify file name used for either dumping or reading analysis data.\n");
H0(" --aq-mode <integer> Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance. Default %d\n", param->rc.aqMode);
H0(" --aq-strength <float> Reduces blocking and blurring in flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength);
+ H0(" --qg-size <float> Specifies the size of the quantization group (64, 32, 16). Default %d\n", param->rc.QGSize);
H0(" --[no-]cutree Enable cutree for Adaptive Quantization. Default %s\n", OPT(param->rc.cuTree));
H1(" --ipratio <float> QP factor between I and P. Default %.2f\n", param->rc.ipFactor);
H1(" --pbratio <float> QP factor between P and B. Default %.2f\n", param->rc.pbFactor);
More information about the x265-devel
mailing list