[x265] [Patch] fix: corrects output mismatch for cutree enabled analysis save/load enodes with reuse-levels in between 1 to 10 for similar encoder settings.
Srikanth Kurapati
srikanth.kurapati at multicorewareinc.com
Mon Jan 11 14:38:30 UTC 2021
>From d516d0564888e154d88d89320302725d87bfab78 Mon Sep 17 00:00:00 2001
From: Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
Date: Wed, 30 Dec 2020 17:00:08 +0530
Subject: [PATCH] fix: corrects output mismatch for cutree enabled analysis
save/load enodes with reuse-levels in between 1 to 10 for similar encoder
settings.
---
source/abrEncApp.cpp | 14 +++-
source/common/common.h | 3 +-
source/common/cudata.h | 2 +-
source/encoder/analysis.cpp | 31 ++++++++-
source/encoder/analysis.h | 1 +
source/encoder/api.cpp | 28 +++++++-
source/encoder/encoder.cpp | 123 ++++++++++++++++++++++++++---------
source/encoder/slicetype.cpp | 2 +-
source/x265.h | 4 +-
9 files changed, 166 insertions(+), 42 deletions(-)
diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
index fa62ebf63..ea255e3f6 100644
--- a/source/abrEncApp.cpp
+++ b/source/abrEncApp.cpp
@@ -340,7 +340,12 @@ namespace X265_NS {
memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char)
* src->depthBytes);
memcpy(intraDst->chromaModes, intraSrc->chromaModes,
sizeof(uint8_t) * src->depthBytes);
if (m_param->rc.cuTree)
- memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
sizeof(int8_t) * src->depthBytes);
+ {
+ if (m_param->analysisSaveReuseLevel == 10)
+ memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
sizeof(int8_t) * src->depthBytes);
+ else
+ memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));
+ }
}
else
{
@@ -355,7 +360,12 @@ namespace X265_NS {
memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
src->depthBytes);
memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
src->depthBytes);
if (m_param->rc.cuTree)
- memcpy(interDst->cuQPOff, interSrc->cuQPOff,
sizeof(int8_t) * src->depthBytes);
+ {
+ if (m_param->analysisReuseLevel == 10)
+ memcpy(interDst->cuQPOff, interSrc->cuQPOff,
sizeof(int8_t) * src->depthBytes);
+ else
+ memcpy(interDst->cuQPOff, interSrc->cuQPOff,
sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));
+ }
if (m_param->analysisSaveReuseLevel > 4)
{
memcpy(interDst->partSize, interSrc->partSize,
sizeof(uint8_t) * src->depthBytes);
diff --git a/source/common/common.h b/source/common/common.h
index 8c06cd79e..0ffbf17eb 100644
--- a/source/common/common.h
+++ b/source/common/common.h
@@ -326,7 +326,8 @@ typedef int16_t coeff_t; // transform coefficient
#define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
#define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
-#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
+#define MAX_NUM_CU_GEOMS 85
+#define X265_MAX_PRED_MODE_PER_CTU MAX_NUM_CU_GEOMS * 2 * 8
#define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // Maximum
number of transform coefficients, for a 32x32 transform
#define MAX_NUM_TR_CATEGORIES 16 // 32, 16,
8, 4 transform categories each for luma and chroma
diff --git a/source/common/cudata.h b/source/common/cudata.h
index 8397f0568..c7d9a1972 100644
--- a/source/common/cudata.h
+++ b/source/common/cudata.h
@@ -371,7 +371,7 @@ struct CUDataMemPool
CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) *
numInstances);
}
else
- {
+ {
uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) +
CHROMA_V_SHIFT(csp));
CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) *
numInstances);
}
diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
index aabf386ca..22a4ba74f 100644
--- a/source/encoder/analysis.cpp
+++ b/source/encoder/analysis.cpp
@@ -220,6 +220,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame,
const CUGeom& cuGeom, con
if (m_param->analysisSave && !m_param->analysisLoad)
for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir;
i++)
m_reuseRef[i] = -1;
+
+ if (m_param->rc.cuTree)
+ m_reuseQP = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr *
MAX_NUM_CU_GEOMS];
}
ProfileCUScope(ctu, totalCTUTime, totalCTUs);
@@ -233,6 +236,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame,
const CUGeom& cuGeom, con
memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr *
numPartition], sizeof(char) * numPartition);
memcpy(ctu.m_chromaIntraDir,
&intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) *
numPartition);
}
+ if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10)
+ m_reuseQP = &intraDataCTU->cuQPOff[ctu.m_cuAddr *
MAX_NUM_CU_GEOMS];
compressIntraCU(ctu, cuGeom, qp);
}
else
@@ -520,6 +525,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
parentCTU, const CUGeom& cuGeom
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
m_param->analysisSaveReuseLevel < 10)
+ m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
+
bool bAlreadyDecided = m_param->intraRefine != 4 &&
parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
!(m_param->bAnalysisType == HEVC_INFO);
bool bDecidedDepth = m_param->intraRefine != 4 &&
parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
int split = 0;
@@ -870,6 +878,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData&
parentCTU, const CUGeom& c
uint32_t minDepth = m_param->rdLevel <= 4 ? topSkipMinDepth(parentCTU,
cuGeom) : 0;
uint32_t splitRefs[4] = { 0, 0, 0, 0 };
+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
m_param->analysisSaveReuseLevel < 10)
+ m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
+
X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
support RD 0 or 1\n");
PMODE pmode(*this, cuGeom);
@@ -1152,6 +1163,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
CUData& parentCTU, const CUGeom&
uint32_t cuAddr = parentCTU.m_cuAddr;
ModeDepth& md = m_modeDepth[depth];
+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
m_param->analysisSaveReuseLevel < 10)
+ m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
if (m_param->searchMethod == X265_SEA)
{
@@ -1856,6 +1869,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
CUData& parentCTU, const CUGeom&
ModeDepth& md = m_modeDepth[depth];
md.bestMode = NULL;
+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
m_param->analysisSaveReuseLevel < 10)
+ m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
+
if (m_param->searchMethod == X265_SEA)
{
int numPredDir = m_slice->isInterP() ? 1 : 2;
@@ -3647,11 +3663,20 @@ int Analysis::calculateQpforCuSize(const CUData&
ctu, const CUGeom& cuGeom, int3
if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
{
- int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
cuGeom.absPartIdx;
+ int cuIdx;
+ int8_t cuQPOffSet = 0;
+
+ if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel
== 10)
+ cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
cuGeom.absPartIdx;
+ else
+ cuIdx = (ctu.m_cuAddr * MAX_NUM_CU_GEOMS) + cuGeom.geomRecurId;
+
if (ctu.m_slice->m_sliceType == I_SLICE)
- return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
+ cuQPOffSet =
((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx];
else
- return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
+ cuQPOffSet =
((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx];
+
+ return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 + cuQPOffSet));
}
if (m_param->rc.hevcAq)
{
diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h
index 3bcb56bc3..8d76d5c5e 100644
--- a/source/encoder/analysis.h
+++ b/source/encoder/analysis.h
@@ -126,6 +126,7 @@ protected:
int32_t* m_reuseRef;
uint8_t* m_reuseDepth;
uint8_t* m_reuseModes;
+ int8_t * m_reuseQP; // array of QP values for
analysis reuse at reuse levels > 1 and < 10 when cutree is enabled
uint8_t* m_reusePartSize;
uint8_t* m_reuseMergeFlag;
x265_analysis_MV* m_reuseMv[2];
diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
index a986355e0..2c90fe8f2 100644
--- a/source/encoder/api.cpp
+++ b/source/encoder/api.cpp
@@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param,
x265_analysis_data* analysis)
CHECKED_MALLOC_ZERO(intraData->partSizes, char,
analysis->numPartitions * analysis->numCUsInFrame);
CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
if (param->rc.cuTree)
- CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+ {
+ if (maxReuseLevel == 10)
+ {
+ CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+ }
+ else
+ {
+ CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);
+ }
+ }
}
analysis->intraData = intraData;
@@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param,
x265_analysis_data* analysis)
CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
if (param->rc.cuTree && !isMultiPassOpt)
- CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+ {
+ if (maxReuseLevel == 10)
+ {
+ CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+ }
+ else
+ {
+ CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);
+ }
+ }
CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
analysis->numPartitions * analysis->numCUsInFrame);
@@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param,
x265_analysis_data* analysis)
X265_FREE((analysis->intraData)->partSizes);
X265_FREE((analysis->intraData)->chromaModes);
if (param->rc.cuTree)
- X265_FREE((analysis->intraData)->cuQPOff);
+ {
+ X265_FREE_ZERO((analysis->intraData)->cuQPOff);
+ }
}
X265_FREE(analysis->intraData);
analysis->intraData = NULL;
@@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param,
x265_analysis_data* analysis)
X265_FREE((analysis->interData)->depth);
X265_FREE((analysis->interData)->modes);
if (!isMultiPassOpt && param->rc.cuTree)
+ {
X265_FREE((analysis->interData)->cuQPOff);
+ }
X265_FREE((analysis->interData)->mvpIdx[0]);
X265_FREE((analysis->interData)->mvpIdx[1]);
X265_FREE((analysis->interData)->mv[0]);
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
index 1f710e1ce..5eb123d31 100644
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -4444,6 +4444,26 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
}
}
}
+
+ int8_t *cuQPBuf = NULL, *cuQPOffSets = NULL;
+ uint32_t reuseBufSize = 0;
+
+ if (m_param->rc.cuTree)
+ {
+ if (m_param->analysisLoadReuseLevel == 10)
+ reuseBufSize = depthBytes;
+ else if (m_param->analysisLoadReuseLevel > 1)
+ reuseBufSize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;
+ cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);
+ if (!m_param->bUseAnalysisFile)
+ {
+ if (analysis->sliceType == X265_TYPE_IDR ||
analysis->sliceType == X265_TYPE_I)
+ cuQPOffSets = intraPic->cuQPOff;
+ else
+ cuQPOffSets = interPic->cuQPOff;
+ }
+ }
+
if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
X265_TYPE_I)
{
if (m_param->bAnalysisType == HEVC_INFO)
@@ -4452,19 +4472,21 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
return;
uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
*partSizes = NULL;
- int8_t *cuQPBuf = NULL;
tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
depthBuf = tempBuf;
modeBuf = tempBuf + depthBytes;
partSizes = tempBuf + 2 * depthBytes;
- if (m_param->rc.cuTree)
- cuQPBuf = X265_MALLOC(int8_t, depthBytes);
X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->depth);
X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
intraPic->chromaModes);
X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->partSizes);
- if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
+ if (m_param->rc.cuTree)
+ {
+ X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
m_analysisFileIn, cuQPOffSets);
+ if (m_param->analysisLoadReuseLevel > 1 &&
m_param->analysisLoadReuseLevel < 10)
+ memcpy(analysis->intraData->cuQPOff, cuQPBuf,
sizeof(int8_t) * reuseBufSize);
+ }
size_t count = 0;
for (uint32_t d = 0; d < depthBytes; d++)
@@ -4480,7 +4502,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
memset(&(analysis->intraData)->depth[count], depthBuf[d],
bytes);
memset(&(analysis->intraData)->chromaModes[count], modeBuf[d],
bytes);
memset(&(analysis->intraData)->partSizes[count], partSizes[d],
bytes);
- if (m_param->rc.cuTree)
+ if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel ==
10)
memset(&(analysis->intraData)->cuQPOff[count], cuQPBuf[d],
bytes);
count += bytes;
}
@@ -4515,7 +4537,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
MV* mv[2];
int8_t* refIdx[2];
- int8_t* cuQPBuf = NULL;
int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
bool bIntraInInter = false;
@@ -4535,12 +4556,15 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
depthBuf = tempBuf;
modeBuf = tempBuf + depthBytes;
- if (m_param->rc.cuTree)
- cuQPBuf = X265_MALLOC(int8_t, depthBytes);
X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->depth);
X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->modes);
- if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, interPic->cuQPOff); }
+ if (m_param->rc.cuTree)
+ {
+ X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
m_analysisFileIn, cuQPOffSets);
+ if (m_param->analysisLoadReuseLevel > 1 &&
m_param->analysisLoadReuseLevel < 10)
+ memcpy(analysis->interData->cuQPOff, cuQPBuf,
sizeof(int8_t) * reuseBufSize);
+ }
if (m_param->analysisLoadReuseLevel > 4)
{
@@ -4578,7 +4602,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
depthBuf[d] = 1;
memset(&(analysis->interData)->depth[count], depthBuf[d],
bytes);
memset(&(analysis->interData)->modes[count], modeBuf[d],
bytes);
- if (m_param->rc.cuTree)
+ if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
== 10)
memset(&(analysis->interData)->cuQPOff[count],
cuQPBuf[d], bytes);
if (m_param->analysisLoadReuseLevel > 4)
{
@@ -4736,7 +4760,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
int numPartitions = analysis->numPartitions;
int numCUsInFrame = analysis->numCUsInFrame;
int numCuInHeight = analysis->numCuInHeight;
- /* Allocate memory for scaled resoultion's numPartitions and
numCUsInFrame*/
+ /* Allocate memory for scaled resolution's numPartitions and
numCUsInFrame */
analysis->numPartitions = m_param->num4x4Partitions;
analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
analysis->numCuInHeight = cuLoc.heightInCU;
@@ -4808,25 +4832,40 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
X265_FREE(vbvCostBuf);
}
+ uint32_t reuseBufSize = 0;
+ int8_t *cuQPOffSets = NULL, *cuQPBuf = NULL;
+ if (m_param->rc.cuTree)
+ {
+ if (m_param->analysisLoadReuseLevel == 10)
+ reuseBufSize = depthBytes;
+ else if (m_param->analysisLoadReuseLevel > 1)
+ reuseBufSize = (MAX_NUM_CU_GEOMS / factor) *
(analysis->numCUsInFrame);
+ cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);
+ if (!m_param->bUseAnalysisFile)
+ {
+ if (analysis->sliceType == X265_TYPE_IDR ||
analysis->sliceType == X265_TYPE_I)
+ cuQPOffSets = intraPic->cuQPOff;
+ else
+ cuQPOffSets = interPic->cuQPOff;
+ }
+ }
+
if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
X265_TYPE_I)
{
if (m_param->analysisLoadReuseLevel < 2)
return;
uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
*partSizes = NULL;
- int8_t *cuQPBuf = NULL;
tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
depthBuf = tempBuf;
modeBuf = tempBuf + depthBytes;
partSizes = tempBuf + 2 * depthBytes;
- if (m_param->rc.cuTree)
- cuQPBuf = X265_MALLOC(int8_t, depthBytes);
X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->depth);
X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
intraPic->chromaModes);
X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->partSizes);
- if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
+ if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
reuseBufSize, m_analysisFileIn, cuQPOffSets); }
uint32_t count = 0;
for (uint32_t d = 0; d < depthBytes; d++)
@@ -4848,7 +4887,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
memset(&(analysis->intraData)->depth[count], depthBuf[d],
bytes);
memset(&(analysis->intraData)->chromaModes[count],
modeBuf[d], bytes);
memset(&(analysis->intraData)->partSizes[count],
partSizes[d], bytes);
- if (m_param->rc.cuTree)
+ if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
== 10)
memset(&(analysis->intraData)->cuQPOff[count],
cuQPBuf[d], bytes);
count += bytes;
d += getCUIndex(&cuLoc, &count, bytes, 1);
@@ -4886,7 +4925,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
MV* mv[2];
int8_t* refIdx[2];
- int8_t* cuQPBuf = NULL;
int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
bool bIntraInInter = false;
@@ -4900,12 +4938,16 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
depthBuf = tempBuf;
modeBuf = tempBuf + depthBytes;
- if (m_param->rc.cuTree)
- cuQPBuf = X265_MALLOC(int8_t, depthBytes);
X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->depth);
X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
interPic->modes);
- if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, interPic->cuQPOff); }
+ if (m_param->rc.cuTree)
+ {
+ X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
m_analysisFileIn, cuQPOffSets);
+ if (m_param->analysisLoadReuseLevel > 1 &&
m_param->analysisLoadReuseLevel < 10)
+ memcpy(&(analysis->interData)->cuQPOff, cuQPBuf,
sizeof(int8_t) * reuseBufSize);
+ }
+
if (m_param->analysisLoadReuseLevel > 4)
{
partSize = modeBuf + depthBytes;
@@ -4954,7 +4996,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
{
memset(&(analysis->interData)->depth[count], writeDepth,
bytes);
memset(&(analysis->interData)->modes[count], modeBuf[d],
bytes);
- if (m_param->rc.cuTree)
+ if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
== 10)
memset(&(analysis->interData)->cuQPOff[count],
cuQPBuf[d], bytes);
if (m_param->analysisLoadReuseLevel == 10 && bIntraInInter)
memset(&(analysis->intraData)->chromaModes[count],
chromaDir[d], bytes);
@@ -5046,7 +5088,9 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
}
}
else
+ {
X265_FREAD((analysis->interData)->ref, sizeof(int32_t),
analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir,
m_analysisFileIn, interPic->ref);
+ }
consumedBytes += frameRecordSize;
if (numDir == 1)
@@ -5510,9 +5554,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
analysis->frameRecordSize += analysis->numCUsInFrame *
sizeof(sse_t);
}
+ uint32_t reuseQPBufsize = 0;
if (m_param->analysisSaveReuseLevel > 1)
{
-
+ reuseQPBufsize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;
if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
X265_TYPE_I)
{
for (uint32_t cuAddr = 0; cuAddr < analysis->numCUsInFrame;
cuAddr++)
@@ -5536,12 +5581,21 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
partSize = ctu->m_partSize[absPartIdx];
intraDataCTU->partSizes[depthBytes] = partSize;
- if (m_param->rc.cuTree)
+ if (m_param->rc.cuTree &&
m_param->analysisSaveReuseLevel == 10)
intraDataCTU->cuQPOff[depthBytes] =
(int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
absPartIdx += ctu->m_numPartitions >> (depth * 2);
}
+
+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel
< 10)
+ {
+ uint32_t nextCuIdx = (cuAddr + 1) * MAX_NUM_CU_GEOMS;
+ for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i <
nextCuIdx; i++)
+ intraDataCTU->cuQPOff[i] =
(int8_t)(intraDataCTU->cuQPOff[i] - baseQP);
+ }
memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
ctu->m_numPartitions);
}
+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel ==
10)
+ reuseQPBufsize = depthBytes;
}
else
{
@@ -5567,7 +5621,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
predMode = 4; // used as indicator if the block is
coded as bidir
interDataCTU->modes[depthBytes] = predMode;
- if (m_param->rc.cuTree)
+ if (m_param->rc.cuTree &&
m_param->analysisSaveReuseLevel == 10)
interDataCTU->cuQPOff[depthBytes] =
(int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
if (m_param->analysisSaveReuseLevel > 4)
@@ -5599,13 +5653,23 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
}
absPartIdx += ctu->m_numPartitions >> (depth * 2);
}
+
+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel
< 10)
+ {
+ uint32_t nextCuIdx = (cuAddr + 1) * MAX_NUM_CU_GEOMS;
+ for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i <
nextCuIdx ; i++)
+ interDataCTU->cuQPOff[i] =
(int8_t)(interDataCTU->cuQPOff[i] - baseQP);
+ }
+
if (m_param->analysisSaveReuseLevel == 10 && bIntraInInter)
memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
ctu->m_numPartitions);
}
+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel ==
10)
+ reuseQPBufsize = depthBytes;
}
if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
== X265_TYPE_I) && m_param->rc.cuTree)
- analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
(sizeof(int8_t) * depthBytes);
+ analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
(sizeof(int8_t) * reuseQPBufsize);
else if (analysis->sliceType == X265_TYPE_IDR ||
analysis->sliceType == X265_TYPE_I)
analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
else
@@ -5613,7 +5677,8 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
/* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */
analysis->frameRecordSize += depthBytes * 2;
if (m_param->rc.cuTree)
- analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
+ analysis->frameRecordSize += (sizeof(int8_t) *
reuseQPBufsize);
+
if (m_param->analysisSaveReuseLevel > 4)
analysis->frameRecordSize += (depthBytes * 2);
@@ -5669,7 +5734,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
depthBytes, m_analysisFileOut);
if (m_param->rc.cuTree)
- X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
depthBytes, m_analysisFileOut);
+ X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
reuseQPBufsize, m_analysisFileOut);
X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
}
else
@@ -5677,7 +5742,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
if (m_param->rc.cuTree)
- X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
depthBytes, m_analysisFileOut);
+ X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
reuseQPBufsize, m_analysisFileOut);
if (m_param->analysisSaveReuseLevel > 4)
{
X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
@@ -5762,7 +5827,7 @@ void
Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c
interData->mv[1][depthBytes].word =
ctu->m_mv[1][absPartIdx].word;
interData->mvpIdx[1][depthBytes] =
ctu->m_mvpIdx[1][absPartIdx];
ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx];
- predMode = 4; // used as indiacator if the block is
coded as bidir
+ predMode = 4; // used as indicator if the block is
coded as bidir
}
interData->modes[depthBytes] = predMode;
diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
index 0adb0d0db..3bc01268b 100644
--- a/source/encoder/slicetype.cpp
+++ b/source/encoder/slicetype.cpp
@@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
bool bKeyframe)
if (!framecnt)
{
- if (m_param->rc.cuTree)
+ if (m_param->rc.cuTree && !m_param->analysisLoad)
cuTree(frames, 0, bKeyframe);
return;
}
diff --git a/source/x265.h b/source/x265.h
index f44040ba7..8d7a75826 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data
uint8_t* modes;
char* partSizes;
uint8_t* chromaModes;
- int8_t* cuQPOff;
+ int8_t* cuQPOff;
}x265_analysis_intra_data;
typedef struct x265_analysis_MV
@@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data
uint8_t* interDir;
uint8_t* mvpIdx[2];
int8_t* refIdx[2];
- x265_analysis_MV* mv[2];
+ x265_analysis_MV* mv[2];
int64_t* sadCost;
int8_t* cuQPOff;
}x265_analysis_inter_data;
--
2.20.1.windows.1
--
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20210111/7ae4cbea/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-fix-corrects-output-mismatch-for-cutree-enabled-anal.patch
Type: application/octet-stream
Size: 30754 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20210111/7ae4cbea/attachment-0001.obj>
More information about the x265-devel
mailing list