[x265] [PATCH SCC 06/09] Fix run to run variation issue
Anusuya Kumarasamy
anusuya.kumarasamy at multicorewareinc.com
Wed Aug 7 17:22:25 UTC 2024
>From 591add8daaaafdccb2fcbd09c29147b016f01424 Mon Sep 17 00:00:00 2001
From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
Date: Thu, 25 Jul 2024 16:49:08 +0530
Subject: [PATCH 6/9] Fix run to run variation issue
---
source/common/cudata.cpp | 8 ++-
source/common/cudata.h | 11 +++-
source/common/frame.cpp | 3 +-
source/encoder/analysis.cpp | 102 +++++++++++++++---------------------
source/encoder/analysis.h | 7 +--
source/encoder/search.cpp | 52 +++++++++---------
source/encoder/search.h | 14 +++--
7 files changed, 96 insertions(+), 101 deletions(-)
diff --git a/source/common/cudata.cpp b/source/common/cudata.cpp
index 23f95288e..395b80fcd 100644
--- a/source/common/cudata.cpp
+++ b/source/common/cudata.cpp
@@ -325,7 +325,7 @@ void CUData::initCTU(const Frame& frame, uint32_t
cuAddr, int qp, uint32_t first
}
// initialize Sub partition
-void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp)
+void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp, MV
lastIntraBCMv[2])
{
m_absIdxInCTU = cuGeom.absPartIdx;
m_encData = ctu.m_encData;
@@ -362,6 +362,12 @@ void CUData::initSubCU(const CUData& ctu, const
CUGeom& cuGeom, int qp)
/* initialize the remaining CU data in one memset */
memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
+
+ if (lastIntraBCMv)
+ {
+ for (int i = 0; i < 2; i++)
+ m_lastIntraBCMv[i] = lastIntraBCMv[i];
+ }
}
/* Copy the results of a sub-part (split) CU to the parent CU */
diff --git a/source/common/cudata.h b/source/common/cudata.h
index 362df5786..69cfb4947 100644
--- a/source/common/cudata.h
+++ b/source/common/cudata.h
@@ -37,6 +37,7 @@ class FrameData;
class Slice;
struct TUEntropyCodingParameters;
struct CUDataMemPool;
+struct IBC;
enum PartSize
{
@@ -120,6 +121,14 @@ struct InterNeighbourMV
union { int16_t refIdx[2]; int32_t unifiedRef; };
};
+struct IBC
+{
+ int m_numBVs;
+ int m_numBV16s;
+ MV m_BVs[64];
+ MV m_lastIntraBCMv[2];
+};
+
typedef void(*cucopy_t)(uint8_t* dst, uint8_t* src); // dst and src are
aligned to MIN(size, 32)
typedef void(*cubcast_t)(uint8_t* dst, uint8_t val); // dst is aligned to
MIN(size, 32)
@@ -240,7 +249,7 @@ public:
static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight,
uint32_t maxCUSize, uint32_t minCUSize, CUGeom
cuDataArray[CUGeom::MAX_GEOMS]);
void initCTU(const Frame& frame, uint32_t cuAddr, int qp, uint32_t
firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCUInSlice);
- void initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp);
+ void initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp, MV
lastIntraBCMv[2] = 0);
void initLosslessCU(const CUData& cu, const CUGeom& cuGeom);
void copyPartFrom(const CUData& cu, const CUGeom& childGeom,
uint32_t subPartIdx);
diff --git a/source/common/frame.cpp b/source/common/frame.cpp
index 6140021aa..b561a67b1 100644
--- a/source/common/frame.cpp
+++ b/source/common/frame.cpp
@@ -315,8 +315,7 @@ void Frame::destroy()
X265_FREE(m_isSubSampled);
}
- int numVersion = !!m_param->bEnableSCC ? 2 : 1;
- for (int i = 0; i < numVersion; i++)
+ for (int i = 0; i < !!m_param->bEnableSCC + 1; i++)
{
if (m_reconPic[i])
{
diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
index 789069c4c..033c781a9 100644
--- a/source/encoder/analysis.cpp
+++ b/source/encoder/analysis.cpp
@@ -223,6 +223,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame,
const CUGeom& cuGeom, con
}
ProfileCUScope(ctu, totalCTUTime, totalCTUs);
+ memset(m_ibc.m_BVs, 0, sizeof(m_ibc.m_BVs));
+ memset(m_ibc.m_lastIntraBCMv, 0, sizeof(m_ibc.m_lastIntraBCMv));
+ m_ibc.m_numBV16s = 0; m_ibc.m_numBVs = 0;
if (m_slice->m_sliceType == I_SLICE || (m_param->bEnableSCC &&
(m_slice->m_numRefIdx[0] == 1) && m_slice->m_refPOCList[0][0] ==
m_slice->m_poc))
{
x265_analysis_intra_data* intraDataCTU =
m_frame->m_analysisData.intraData;
@@ -233,7 +236,7 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame,
const CUGeom& cuGeom, con
memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr *
numPartition], sizeof(char) * numPartition);
memcpy(ctu.m_chromaIntraDir,
&intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) *
numPartition);
}
- compressIntraCU(ctu, cuGeom, qp);
+ compressIntraCU(ctu, cuGeom, qp, m_ibc);
}
else
{
@@ -266,7 +269,7 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame,
const CUGeom& cuGeom, con
if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
ctu.m_cuPelX / m_param->maxCUSize >=
frame.m_encData->m_pir.pirStartCol
&& ctu.m_cuPelX / m_param->maxCUSize <
frame.m_encData->m_pir.pirEndCol)
- compressIntraCU(ctu, cuGeom, qp);
+ compressIntraCU(ctu, cuGeom, qp, m_ibc);
else if (!m_param->rdLevel)
{
/* In RD Level 0/1, copy source pixels into the reconstructed
block so
@@ -304,7 +307,7 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame,
const CUGeom& cuGeom, con
else if (m_param->rdLevel <= 4)
compressInterCU_rd0_4(ctu, cuGeom, qp);
else
- compressInterCU_rd5_6(ctu, cuGeom, qp);
+ compressInterCU_rd5_6(ctu, cuGeom, qp, m_ibc);
}
if (m_param->bEnableRdRefine || m_param->bOptCUDeltaQP)
@@ -511,21 +514,12 @@ void Analysis::qprdRefine(const CUData& parentCTU,
const CUGeom& cuGeom, int32_t
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic[0],
parentCTU.m_cuAddr, cuGeom.absPartIdx);
}
-uint64_t Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp)
+uint64_t Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp, IBC& ibc)
{
uint32_t depth = cuGeom.depth;
ModeDepth& md = m_modeDepth[depth];
md.bestMode = NULL;
- MV lastIntraBCMv[2];
- for (int i = 0; i < 2; i++)
- {
- if (depth == 0)
- lastIntraBCMv[i] = parentCTU.m_lastIntraBCMv[i];
- else
- lastIntraBCMv[i] = tempLastIntraBCMv[i];
- }
-
MV iMVCandList[4][10];
memset(iMVCandList, 0, sizeof(MV) * 4 * 10);
@@ -584,13 +578,13 @@ uint64_t Analysis::compressIntraCU(const CUData&
parentCTU, const CUGeom& cuGeom
double intracost = MAX_DOUBLE;
if (m_param->bEnableSCC)
{
- md.pred[PRED_MERGE_IBC].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_MERGE_IBC].cu.initSubCU(parentCTU, cuGeom, qp,
ibc.m_lastIntraBCMv);
checkRDCostIntraBCMerge2Nx2N(md.pred[PRED_MERGE_IBC], cuGeom);
if (!bSkipIntraBlockCopySearch)
{
- md.pred[PRED_IBC_2Nx2N].cu.initSubCU(parentCTU, cuGeom,
qp);
- checkIntraBC_rd5_6(md.pred[PRED_IBC_2Nx2N], cuGeom,
SIZE_2Nx2N, false, bUse1DSearchFor8x8, tempLastIntraBCMv, &MV(0, 0));
+ md.pred[PRED_IBC_2Nx2N].cu.initSubCU(parentCTU, cuGeom,
qp, ibc.m_lastIntraBCMv);
+ checkIntraBC_rd5_6(md.pred[PRED_IBC_2Nx2N], cuGeom,
SIZE_2Nx2N, false, bUse1DSearchFor8x8, &MV(0, 0), ibc);
checkBestMode(md.pred[PRED_IBC_2Nx2N], depth);
if (intraBlockCopyFastSearch)
@@ -602,13 +596,13 @@ uint64_t Analysis::compressIntraCU(const CUData&
parentCTU, const CUGeom& cuGeom
double dTH3 = max(66 * m_rdCost.m_lambda, 800.0);
- md.pred[PRED_IBC_Nx2N].cu.initSubCU(parentCTU,
cuGeom, qp);
- checkIntraBC_rd5_6(md.pred[PRED_IBC_Nx2N], cuGeom,
SIZE_Nx2N, false, bUse1DSearchFor8x8, tempLastIntraBCMv, &MV(0, 0));
+ md.pred[PRED_IBC_Nx2N].cu.initSubCU(parentCTU,
cuGeom, qp, ibc.m_lastIntraBCMv);
+ checkIntraBC_rd5_6(md.pred[PRED_IBC_Nx2N], cuGeom,
SIZE_Nx2N, false, bUse1DSearchFor8x8, &MV(0, 0), ibc);
checkBestMode(md.pred[PRED_IBC_Nx2N], depth);
intracost = min(intracost,
md.pred[PRED_IBC_Nx2N].rdCost);
- md.pred[PRED_IBC_2NxN].cu.initSubCU(parentCTU,
cuGeom, qp);
- checkIntraBC_rd5_6(md.pred[PRED_IBC_2NxN], cuGeom,
SIZE_2NxN, false, bUse1DSearchFor8x8, tempLastIntraBCMv, &MV(0, 0));
+ md.pred[PRED_IBC_2NxN].cu.initSubCU(parentCTU,
cuGeom, qp, ibc.m_lastIntraBCMv);
+ checkIntraBC_rd5_6(md.pred[PRED_IBC_2NxN], cuGeom,
SIZE_2NxN, false, bUse1DSearchFor8x8, &MV(0, 0), ibc);
checkBestMode(md.pred[PRED_IBC_2NxN], depth);
intracost = min(intracost,
md.pred[PRED_IBC_2NxN].rdCost);
}
@@ -616,11 +610,11 @@ uint64_t Analysis::compressIntraCU(const CUData&
parentCTU, const CUGeom& cuGeom
else
{
md.pred[PRED_IBC_2NxN].cu.initSubCU(parentCTU, cuGeom,
qp);
- checkIntraBC_rd5_6(md.pred[PRED_IBC_2NxN], cuGeom,
SIZE_2NxN, false, bUse1DSearchFor8x8, tempLastIntraBCMv, &MV(0, 0));
+ checkIntraBC_rd5_6(md.pred[PRED_IBC_2NxN], cuGeom,
SIZE_2NxN, false, bUse1DSearchFor8x8, &MV(0, 0), ibc);
checkBestMode(md.pred[PRED_IBC_2NxN], depth);
md.pred[PRED_IBC_Nx2N].cu.initSubCU(parentCTU, cuGeom,
qp);
- checkIntraBC_rd5_6(md.pred[PRED_IBC_Nx2N], cuGeom,
SIZE_Nx2N, false, bUse1DSearchFor8x8, tempLastIntraBCMv, &MV(0, 0));
+ checkIntraBC_rd5_6(md.pred[PRED_IBC_Nx2N], cuGeom,
SIZE_Nx2N, false, bUse1DSearchFor8x8, &MV(0, 0), ibc);
checkBestMode(md.pred[PRED_IBC_Nx2N], depth);
}
}
@@ -746,12 +740,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
parentCTU, const CUGeom& cuGeom
if (m_slice->m_pps->bUseDQP && nextDepth <=
m_slice->m_pps->maxCuDQPDepth)
nextQP = setLambdaFromQP(parentCTU,
calculateQpforCuSize(parentCTU, childGeom));
- for (int i = 0; i < 2; i++)
- tempLastIntraBCMv[i] = lastIntraBCMv[i];
-
if (m_param->bEnableSplitRdSkip)
{
- curCost += compressIntraCU(parentCTU, childGeom,
nextQP);
+ curCost += compressIntraCU(parentCTU, childGeom,
nextQP, ibc);
if (m_modeDepth[depth].bestMode && curCost >
m_modeDepth[depth].bestMode->rdCost)
{
skipSplitCheck = 1;
@@ -759,13 +750,13 @@ uint64_t Analysis::compressIntraCU(const CUData&
parentCTU, const CUGeom& cuGeom
}
}
else
- compressIntraCU(parentCTU, childGeom, nextQP);
+ compressIntraCU(parentCTU, childGeom, nextQP, ibc);
if (nd.bestMode->cu.m_lastIntraBCMv[0].x != 0 ||
nd.bestMode->cu.m_lastIntraBCMv[0].y != 0)
{
for (int i = 0; i < 2; i++)
{
- lastIntraBCMv[i] =
nd.bestMode->cu.m_lastIntraBCMv[i];
+ ibc.m_lastIntraBCMv[i] =
nd.bestMode->cu.m_lastIntraBCMv[i];
}
}
@@ -1303,7 +1294,7 @@ uint32_t Analysis::compressInterCU_dist(const CUData&
parentCTU, const CUGeom& c
SplitData Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const
CUGeom& cuGeom, int32_t qp)
{
if (parentCTU.m_vbvAffected && calculateQpforCuSize(parentCTU, cuGeom,
1))
- return compressInterCU_rd5_6(parentCTU, cuGeom, qp);
+ return compressInterCU_rd5_6(parentCTU, cuGeom, qp, m_ibc);
uint32_t depth = cuGeom.depth;
uint32_t cuAddr = parentCTU.m_cuAddr;
@@ -2004,7 +1995,7 @@ SplitData Analysis::compressInterCU_rd0_4(const
CUData& parentCTU, const CUGeom&
return splitCUData;
}
-SplitData Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const
CUGeom& cuGeom, int32_t qp)
+SplitData Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const
CUGeom& cuGeom, int32_t qp, IBC& ibc)
{
if (parentCTU.m_vbvAffected && !calculateQpforCuSize(parentCTU,
cuGeom, 1))
return compressInterCU_rd0_4(parentCTU, cuGeom, qp);
@@ -2014,14 +2005,6 @@ SplitData Analysis::compressInterCU_rd5_6(const
CUData& parentCTU, const CUGeom&
md.bestMode = NULL;
Mode* interBest; // store the best modes in inter prediction
- MV lastIntraBCMv[2];
- for (int i = 0; i < 2; i++)
- {
- if (depth == 0)
- lastIntraBCMv[i] = parentCTU.m_lastIntraBCMv[i];
- else
- lastIntraBCMv[i] = tempLastIntraBCMv[i];
- }
MV iMVCandList[4][10];
memset(iMVCandList, 0, sizeof(MV) * 4 * 10);
@@ -2196,7 +2179,7 @@ SplitData Analysis::compressInterCU_rd5_6(const
CUData& parentCTU, const CUGeom&
interBest = md.bestMode;
if (m_param->bEnableSCC)
{
- md.pred[PRED_MERGE_IBC].cu.initSubCU(parentCTU, cuGeom,
qp);
+ md.pred[PRED_MERGE_IBC].cu.initSubCU(parentCTU, cuGeom,
qp, ibc.m_lastIntraBCMv);
checkRDCostIntraBCMerge2Nx2N(md.pred[PRED_MERGE_IBC],
cuGeom);
}
@@ -2236,15 +2219,12 @@ SplitData Analysis::compressInterCU_rd5_6(const
CUData& parentCTU, const CUGeom&
if (m_slice->m_pps->bUseDQP && nextDepth <=
m_slice->m_pps->maxCuDQPDepth)
nextQP = setLambdaFromQP(parentCTU,
calculateQpforCuSize(parentCTU, childGeom));
- for (int i = 0; i < 2; i++)
- tempLastIntraBCMv[i] = lastIntraBCMv[i];
-
- splitData[subPartIdx] =
compressInterCU_rd5_6(parentCTU, childGeom, nextQP);
+ splitData[subPartIdx] =
compressInterCU_rd5_6(parentCTU, childGeom, nextQP, ibc);
if (nd.bestMode->cu.m_lastIntraBCMv[0].x != 0 ||
nd.bestMode->cu.m_lastIntraBCMv[0].y != 0)
{
for (int i = 0; i < 2; i++)
- lastIntraBCMv[i] =
nd.bestMode->cu.m_lastIntraBCMv[i];
+ ibc.m_lastIntraBCMv[i] =
nd.bestMode->cu.m_lastIntraBCMv[i];
}
// Save best CU and pred data for this sub CU
@@ -2488,8 +2468,8 @@ SplitData Analysis::compressInterCU_rd5_6(const
CUData& parentCTU, const CUGeom&
if (!bSkipIntraBlockCopySearch)
{
- md.pred[PRED_IBC_2Nx2N].cu.initSubCU(parentCTU,
cuGeom, qp);
- checkIntraBC_rd5_6(md.pred[PRED_IBC_2Nx2N],
cuGeom, SIZE_2Nx2N, false, bUse1DSearchFor8x8, tempLastIntraBCMv, &MV(0,
0));
+ md.pred[PRED_IBC_2Nx2N].cu.initSubCU(parentCTU,
cuGeom, qp, ibc.m_lastIntraBCMv);
+ checkIntraBC_rd5_6(md.pred[PRED_IBC_2Nx2N],
cuGeom, SIZE_2Nx2N, false, bUse1DSearchFor8x8, &MV(0, 0), ibc);
checkBestMode(md.pred[PRED_IBC_2Nx2N], depth);
if (intraBlockCopyFastSearch)
@@ -2500,12 +2480,12 @@ SplitData Analysis::compressInterCU_rd5_6(const
CUData& parentCTU, const CUGeom&
double dTH2 = max(60 * m_rdCost.m_lambda,
56.0);
double dTH3 = max(66 * m_rdCost.m_lambda,
800.0);
-
md.pred[PRED_IBC_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkIntraBC_rd5_6(md.pred[PRED_IBC_Nx2N],
cuGeom, SIZE_Nx2N, false, bUse1DSearchFor8x8, tempLastIntraBCMv,
(iMVCandList[SIZE_Nx2N] + 8));
+
md.pred[PRED_IBC_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp,
ibc.m_lastIntraBCMv);
+ checkIntraBC_rd5_6(md.pred[PRED_IBC_Nx2N],
cuGeom, SIZE_Nx2N, false, bUse1DSearchFor8x8, (iMVCandList[SIZE_Nx2N] + 8),
ibc);
checkBestMode(md.pred[PRED_IBC_Nx2N],
depth);
intracost = min(intracost,
md.pred[PRED_IBC_Nx2N].rdCost);
-
md.pred[PRED_MIXED_IBC_NX2N].cu.initSubCU(parentCTU, cuGeom, qp);
+
md.pred[PRED_MIXED_IBC_NX2N].cu.initSubCU(parentCTU, cuGeom, qp,
ibc.m_lastIntraBCMv);
bValid =
predMixedIntraBCInterSearch(md.pred[PRED_MIXED_IBC_NX2N], cuGeom, m_csp !=
X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400, SIZE_Nx2N,
false, iMVCandList[SIZE_Nx2N]);
if (bValid)
encodeResAndCalcRdInterCU(md.pred[PRED_MIXED_IBC_NX2N], cuGeom);
@@ -2513,12 +2493,12 @@ SplitData Analysis::compressInterCU_rd5_6(const
CUData& parentCTU, const CUGeom&
md.pred[PRED_MIXED_IBC_NX2N].rdCost =
UINT64_MAX;
checkBestMode(md.pred[PRED_MIXED_IBC_NX2N], depth);
-
md.pred[PRED_IBC_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
- checkIntraBC_rd5_6(md.pred[PRED_IBC_2NxN],
cuGeom, SIZE_2NxN, false, bUse1DSearchFor8x8, tempLastIntraBCMv,
(iMVCandList[SIZE_2NxN] + 8));
+
md.pred[PRED_IBC_2NxN].cu.initSubCU(parentCTU, cuGeom, qp,
ibc.m_lastIntraBCMv);
+ checkIntraBC_rd5_6(md.pred[PRED_IBC_2NxN],
cuGeom, SIZE_2NxN, false, bUse1DSearchFor8x8, (iMVCandList[SIZE_2NxN] + 8),
ibc);
checkBestMode(md.pred[PRED_IBC_2NxN],
depth);
intracost = min(intracost,
md.pred[PRED_IBC_2NxN].rdCost);
-
md.pred[PRED_MIXED_IBC_2NXN].cu.initSubCU(parentCTU, cuGeom, qp);
+
md.pred[PRED_MIXED_IBC_2NXN].cu.initSubCU(parentCTU, cuGeom, qp,
ibc.m_lastIntraBCMv);
bValid =
predMixedIntraBCInterSearch(md.pred[PRED_MIXED_IBC_2NXN], cuGeom, m_csp !=
X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400, SIZE_2NxN,
false, iMVCandList[SIZE_2NxN]);
if (bValid)
encodeResAndCalcRdInterCU(md.pred[PRED_MIXED_IBC_2NXN], cuGeom);
@@ -2530,11 +2510,11 @@ SplitData Analysis::compressInterCU_rd5_6(const
CUData& parentCTU, const CUGeom&
else // full search
{
md.pred[PRED_IBC_2NxN].cu.initSubCU(parentCTU,
cuGeom, qp);
- checkIntraBC_rd5_6(md.pred[PRED_IBC_2NxN],
cuGeom, SIZE_2NxN, false, bUse1DSearchFor8x8, tempLastIntraBCMv, &MV(0, 0));
+ checkIntraBC_rd5_6(md.pred[PRED_IBC_2NxN],
cuGeom, SIZE_2NxN, false, bUse1DSearchFor8x8, &MV(0, 0), ibc);
checkBestMode(md.pred[PRED_IBC_2NxN], depth);
md.pred[PRED_IBC_Nx2N].cu.initSubCU(parentCTU,
cuGeom, qp);
- checkIntraBC_rd5_6(md.pred[PRED_IBC_Nx2N],
cuGeom, SIZE_Nx2N, false, bUse1DSearchFor8x8, tempLastIntraBCMv, &MV(0, 0));
+ checkIntraBC_rd5_6(md.pred[PRED_IBC_Nx2N],
cuGeom, SIZE_Nx2N, false, bUse1DSearchFor8x8, &MV(0, 0), ibc);
checkBestMode(md.pred[PRED_IBC_Nx2N], depth);
}
}
@@ -2794,7 +2774,7 @@ void Analysis::recodeCU(const CUData& parentCTU,
const CUGeom& cuGeom, int32_t q
if (parentCTU.isIntra(cuGeom.absPartIdx) && m_refineLevel < 2)
{
if (m_param->intraRefine == 4)
- compressIntraCU(parentCTU, cuGeom, qp);
+ compressIntraCU(parentCTU, cuGeom, qp, m_ibc);
else
{
bool reuseModes = !((m_param->intraRefine == 3) ||
@@ -2913,7 +2893,7 @@ void Analysis::recodeCU(const CUData& parentCTU,
const CUGeom& cuGeom, int32_t q
if (parentCTU.m_skipFlag[list][cuGeom.absPartIdx] == 1 &&
cuGeom.numPartitions <= 16)
m_checkMergeAndSkipOnly[list] = true;
}
- m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU,
cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp);
+ m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU,
cuGeom, qp, m_ibc) : compressInterCU_rd0_4(parentCTU, cuGeom, qp);
for (int list = 0; list < m_slice->isInterB() + 1; list++)
{
m_modeFlag[list] = false;
@@ -2930,7 +2910,7 @@ void Analysis::recodeCU(const CUData& parentCTU,
const CUGeom& cuGeom, int32_t q
m_evaluateInter = 1;
else
bDecidedDepth = true;
- m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU,
cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp);
+ m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU,
cuGeom, qp, m_ibc) : compressInterCU_rd0_4(parentCTU, cuGeom, qp);
m_evaluateInter = 0;
}
}
@@ -2963,7 +2943,7 @@ void Analysis::recodeCU(const CUData& parentCTU,
const CUGeom& cuGeom, int32_t q
int lamdaQP = (m_param->analysisLoadReuseLevel >= 7) ?
nextQP : lqp;
if (split)
- m_param->rdLevel > 4 ?
compressInterCU_rd5_6(parentCTU, childGeom, nextQP) :
compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
+ m_param->rdLevel > 4 ?
compressInterCU_rd5_6(parentCTU, childGeom, nextQP, m_ibc) :
compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
else
qprdRefine(parentCTU, childGeom, nextQP, lamdaQP);
@@ -3554,7 +3534,7 @@ void Analysis::checkInter_rd5_6(Mode& interMode,
const CUGeom& cuGeom, PartSize
}
}
-void Analysis::checkIntraBC_rd5_6(Mode& intraBCMode, const CUGeom& cuGeom,
PartSize ePartSize, bool testOnlyPred, bool bUse1DSearchFor8x8, MV
lastIntraBCMv[2], MV* iMVCandList)
+void Analysis::checkIntraBC_rd5_6(Mode& intraBCMode, const CUGeom& cuGeom,
PartSize ePartSize, bool testOnlyPred, bool bUse1DSearchFor8x8, MV*
iMVCandList, IBC& ibc)
{
intraBCMode.initCosts();
intraBCMode.cu.setPartSizeSubParts(ePartSize);
@@ -3562,9 +3542,9 @@ void Analysis::checkIntraBC_rd5_6(Mode& intraBCMode,
const CUGeom& cuGeom, PartS
intraBCMode.cu.setLumaIntraDirSubParts(DC_IDX, 0, cuGeom.depth);
intraBCMode.cu.setChromIntraDirSubParts(DC_IDX, 0, cuGeom.depth);
for (int i = 0; i < 2; i++)
- intraBCMode.cu.m_lastIntraBCMv[i] = lastIntraBCMv[i];
+ intraBCMode.cu.m_lastIntraBCMv[i] = ibc.m_lastIntraBCMv[i];
- bool bValid = predIntraBCSearch(intraBCMode, cuGeom, m_csp !=
X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400, ePartSize,
testOnlyPred, bUse1DSearchFor8x8);
+ bool bValid = predIntraBCSearch(intraBCMode, cuGeom, m_csp !=
X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400, ePartSize,
testOnlyPred, bUse1DSearchFor8x8, ibc);
if (bValid)
encodeResAndCalcRdInterCU(intraBCMode, cuGeom);
else
diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h
index 6f2e4d931..e672d3554 100644
--- a/source/encoder/analysis.h
+++ b/source/encoder/analysis.h
@@ -119,6 +119,7 @@ public:
bool m_modeFlag[2];
bool m_checkMergeAndSkipOnly[2];
+ IBC m_ibc;
Analysis();
bool create(ThreadLocalData* tld);
@@ -170,12 +171,12 @@ protected:
void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, int32_t
qp, int32_t lqp);
/* full analysis for an I-slice CU */
- uint64_t compressIntraCU(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp);
+ uint64_t compressIntraCU(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp, IBC &ibc);
/* full analysis for a P or B slice CU */
uint32_t compressInterCU_dist(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp);
SplitData compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp);
- SplitData compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp);
+ SplitData compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp, IBC& ibc);
void recodeCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t
qp, int32_t origqp = -1);
@@ -190,7 +191,7 @@ protected:
void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom&
cuGeom);
void checkRDCostIntraBCMerge2Nx2N(Mode& merge, const CUGeom& cuGeom);
- void checkIntraBC_rd5_6(Mode& intraBCMode, const CUGeom& cuGeom,
PartSize ePartSize, bool testOnlyPred, bool bUse1DSearchFor8x8, MV
lastIntraBCMv[2], MV* iMVCandList = (0, 0));
+ void checkIntraBC_rd5_6(Mode& intraBCMode, const CUGeom& cuGeom,
PartSize ePartSize, bool testOnlyPred, bool bUse1DSearchFor8x8, MV*
iMVCandList ,IBC& ibc);
/* encode current bestMode losslessly, pick best RD cost */
void tryLossless(const CUGeom& cuGeom);
diff --git a/source/encoder/search.cpp b/source/encoder/search.cpp
index 3a5c54ffa..f72ea8c9c 100644
--- a/source/encoder/search.cpp
+++ b/source/encoder/search.cpp
@@ -172,6 +172,9 @@ bool Search::initSearch(const x265_param& param,
ScalingList& scalingList)
CHECKED_MALLOC(m_tsResidual, int16_t, MAX_TS_SIZE * MAX_TS_SIZE);
CHECKED_MALLOC(m_tsRecon, pixel, MAX_TS_SIZE * MAX_TS_SIZE);
+ m_numBVs = 0;
+ m_numBV16s = 0;
+
return ok;
fail:
@@ -2771,15 +2774,15 @@ int Search::intraBCSearchMVChromaRefine(Mode&
intraBCMode,
return bestCandIdx;
}
-void Search::updateBVMergeCandLists(int roiWidth, int roiHeight, MV*
mvCand)
+void Search::updateBVMergeCandLists(int roiWidth, int roiHeight, MV*
mvCand, IBC& ibc)
{
if (roiWidth + roiHeight > 8)
{
- m_numBVs = mergeCandLists(m_BVs, m_numBVs, mvCand,
CHROMA_REFINEMENT_CANDIDATES, false);
+ ibc.m_numBVs = mergeCandLists(ibc.m_BVs, ibc.m_numBVs, mvCand,
CHROMA_REFINEMENT_CANDIDATES, false);
if (roiWidth + roiHeight == 32)
{
- m_numBV16s = m_numBVs;
+ ibc.m_numBV16s = ibc.m_numBVs;
}
}
}
@@ -2954,7 +2957,7 @@ bool Search::isValidIntraBCSearchArea(CUData* cu, int
predX, int predY, int roiW
}
void Search::intraPatternSearch(Mode& intraBCMode, const CUGeom& cuGeom,
int puIdx, uint32_t partAddr, pixel* refY, int refStride, MV*
searchRangeLT, MV* searchRangeRB,
- MV& mv, uint32_t& cost, int roiWidth, int roiHeight, bool
testOnlyPred, bool bUse1DSearchFor8x8)
+ MV& mv, uint32_t& cost, int roiWidth, int roiHeight, bool
testOnlyPred, bool bUse1DSearchFor8x8, IBC& ibc)
{
const int srchRngHorLeft = searchRangeLT->x;
const int srchRngHorRight = searchRangeRB->x;
@@ -3021,22 +3024,21 @@ void Search::intraPatternSearch(Mode& intraBCMode,
const CUGeom& cuGeom, int puI
}
if (roiWidth > 8 || roiHeight > 8)
- m_numBVs = 0;
+ ibc.m_numBVs = 0;
else if (roiWidth + roiHeight == 16)
- m_numBVs = m_numBV16s;
+ ibc.m_numBVs = ibc.m_numBV16s;
if (testOnlyPred)
- m_numBVs = 0;
+ ibc.m_numBVs = 0;
MV mvPredEncOnly[16];
int nbPreds = 0;
-
cu.getIntraBCMVPsEncOnly(partAddr, mvPredEncOnly, nbPreds, puIdx);
- m_numBVs = mergeCandLists(m_BVs, m_numBVs, mvPredEncOnly, nbPreds,
true);
+ ibc.m_numBVs = mergeCandLists(ibc.m_BVs, ibc.m_numBVs,
mvPredEncOnly, nbPreds, true);
- for (uint32_t cand = 0; cand < m_numBVs; cand++)
+ for (uint32_t cand = 0; cand < ibc.m_numBVs; cand++)
{
- int xPred = m_BVs[cand].x >> 2;
- int yPred = m_BVs[cand].y >> 2;
+ int xPred = ibc.m_BVs[cand].x >> 2;
+ int yPred = ibc.m_BVs[cand].y >> 2;
if (!(xPred == 0 && yPred == 0) && !((yPred < srTop) || (yPred
> srBottom)) && !((xPred < srLeft) || (xPred > srRight)))
{
int tempY = yPred + relCUPelY + roiHeight - 1;
@@ -3058,7 +3060,7 @@ void Search::intraPatternSearch(Mode& intraBCMode,
const CUGeom& cuGeom, int puI
if (validCand)
{
- sad = m_me.mvcost(m_BVs[cand]);
+ sad = m_me.mvcost(ibc.m_BVs[cand]);
refSrch = refY + yPred * refStride + xPred;
@@ -3118,7 +3120,7 @@ void Search::intraPatternSearch(Mode& intraBCMode,
const CUGeom& cuGeom, int puI
mv.set(bestX, bestY);
cost = sadBest;
- updateBVMergeCandLists(roiWidth, roiHeight, MVCand);
+ updateBVMergeCandLists(roiWidth, roiHeight, MVCand, ibc);
return;
}
}
@@ -3155,7 +3157,7 @@ void Search::intraPatternSearch(Mode& intraBCMode,
const CUGeom& cuGeom, int puI
mv.set(bestX, bestY);
cost = sadBest;
- updateBVMergeCandLists(roiWidth, roiHeight, MVCand);
+ updateBVMergeCandLists(roiWidth, roiHeight, MVCand, ibc);
return;
}
}
@@ -3174,7 +3176,7 @@ void Search::intraPatternSearch(Mode& intraBCMode,
const CUGeom& cuGeom, int puI
mv.set(bestX, bestY);
cost = sadBest;
- updateBVMergeCandLists(roiWidth, roiHeight, MVCand);
+ updateBVMergeCandLists(roiWidth, roiHeight, MVCand, ibc);
return;
}
@@ -3238,7 +3240,7 @@ void Search::intraPatternSearch(Mode& intraBCMode,
const CUGeom& cuGeom, int puI
mv.set(bestX, bestY);
cost = sadBest;
- updateBVMergeCandLists(roiWidth, roiHeight, MVCand);
+ updateBVMergeCandLists(roiWidth, roiHeight, MVCand, ibc);
return;
}
@@ -3299,7 +3301,7 @@ void Search::intraPatternSearch(Mode& intraBCMode,
const CUGeom& cuGeom, int puI
mv.set(bestX, bestY);
cost = sadBest;
- updateBVMergeCandLists(roiWidth, roiHeight,
MVCand);
+ updateBVMergeCandLists(roiWidth, roiHeight,
MVCand, ibc);
return;
}
}
@@ -3319,7 +3321,7 @@ void Search::intraPatternSearch(Mode& intraBCMode,
const CUGeom& cuGeom, int puI
mv.set(bestX, bestY);
cost = sadBest;
- updateBVMergeCandLists(roiWidth, roiHeight, MVCand);
+ updateBVMergeCandLists(roiWidth, roiHeight, MVCand, ibc);
return;
}
@@ -3383,7 +3385,7 @@ void Search::intraPatternSearch(Mode& intraBCMode,
const CUGeom& cuGeom, int puI
mv.set(bestX, bestY);
cost = sadBest;
- updateBVMergeCandLists(roiWidth, roiHeight,
MVCand);
+ updateBVMergeCandLists(roiWidth, roiHeight,
MVCand, ibc);
return;
}
}
@@ -3453,7 +3455,7 @@ void Search::intraPatternSearch(Mode& intraBCMode,
const CUGeom& cuGeom, int puI
mv.set(bestX, bestY);
cost = sadBest;
- updateBVMergeCandLists(roiWidth, roiHeight, MVCand);
+ updateBVMergeCandLists(roiWidth, roiHeight, MVCand, ibc);
}
@@ -3518,7 +3520,7 @@ void Search::setIntraSearchRange(Mode& intraBCMode,
MV& pred, int puIdx, int roi
}
-void Search::intraBlockCopyEstimate(Mode& intraBCMode, const CUGeom&
cuGeom, int puIdx, MV* pred, MV& mv, uint32_t& cost, bool testOnlyPred,
bool bUse1DSearchFor8x8)
+void Search::intraBlockCopyEstimate(Mode& intraBCMode, const CUGeom&
cuGeom, int puIdx, MV* pred, MV& mv, uint32_t& cost, bool testOnlyPred,
bool bUse1DSearchFor8x8, IBC& ibc)
{
uint32_t partAddr;
int roiWidth;
@@ -3558,10 +3560,10 @@ void Search::intraBlockCopyEstimate(Mode&
intraBCMode, const CUGeom& cuGeom, int
m_me.setMVP(predictors);
- intraPatternSearch(intraBCMode, cuGeom, puIdx, partAddr, refY,
strideY, &searchRangeLT, &searchRangeRB, mv, cost, roiWidth, roiHeight,
testOnlyPred, bUse1DSearchFor8x8);
+ intraPatternSearch(intraBCMode, cuGeom, puIdx, partAddr, refY,
strideY, &searchRangeLT, &searchRangeRB, mv, cost, roiWidth, roiHeight,
testOnlyPred, bUse1DSearchFor8x8, ibc);
}
-bool Search::predIntraBCSearch(Mode& intraBCMode, const CUGeom& cuGeom,
bool bChromaMC, PartSize ePartSize, bool testOnlyPred, bool
bUse1DSearchFor8x8)
+bool Search::predIntraBCSearch(Mode& intraBCMode, const CUGeom& cuGeom,
bool bChromaMC, PartSize ePartSize, bool testOnlyPred, bool
bUse1DSearchFor8x8, IBC& ibc)
{
MV zeroMv(0, 0);
CUData& cu = intraBCMode.cu;
@@ -3596,7 +3598,7 @@ bool Search::predIntraBCSearch(Mode& intraBCMode,
const CUGeom& cuGeom, bool bCh
MVField mvField;
uint32_t cost;
mv.set(0, 0);
- intraBlockCopyEstimate(intraBCMode, cuGeom, puIdx, mvPred, mv,
cost, testOnlyPred, bUse1DSearchFor8x8);
+ intraBlockCopyEstimate(intraBCMode, cuGeom, puIdx, mvPred, mv,
cost, testOnlyPred, bUse1DSearchFor8x8, ibc);
bestME->mv.set(mv.x << 2, mv.y << 2);
bestME->cost = cost;
diff --git a/source/encoder/search.h b/source/encoder/search.h
index e05d6ef95..948ea1883 100644
--- a/source/encoder/search.h
+++ b/source/encoder/search.h
@@ -289,12 +289,10 @@ public:
bool m_vertRestriction;
int m_ibcEnabled;
-
- int m_numBVs = 0;
- int m_numBV16s = 0;
+ int m_numBVs;
+ int m_numBV16s;
MV m_BVs[64];
uint32_t m_lastCandCost;
- MV tempLastIntraBCMv[2] = { 0,0 };
#if DETAILED_CU_STATS
/* Accumulate CU statistics separately for each frame encoder */
CUStats m_stats[X265_MAX_FRAME_THREADS];
@@ -338,16 +336,16 @@ public:
MV getLowresMV(const CUData& cu, const PredictionUnit& pu, int list,
int ref);
- bool predIntraBCSearch(Mode& intraBCMode, const CUGeom& cuGeom,
bool bChromaMC, PartSize ePartSize, bool testOnlyPred, bool
bUse1DSearchFor8x8);
- void intraBlockCopyEstimate(Mode& intraBCMode, const CUGeom&
cuGeom, int puIdx, MV* pred, MV& mv, uint32_t& cost, bool testOnlyPred,
bool bUse1DSearchFor8x8);
+ bool predIntraBCSearch(Mode& intraBCMode, const CUGeom& cuGeom,
bool bChromaMC, PartSize ePartSize, bool testOnlyPred, bool
bUse1DSearchFor8x8, IBC& ibc);
+ void intraBlockCopyEstimate(Mode& intraBCMode, const CUGeom&
cuGeom, int puIdx, MV* pred, MV& mv, uint32_t& cost, bool testOnlyPred,
bool bUse1DSearchFor8x8, IBC& ibc);
void setIntraSearchRange(Mode& intraBCMode, MV& pred, int puIdx,
int roiWidth, int roiHeight, MV& searchRangeLT, MV& searchRangeRB);
void intraPatternSearch(Mode& intraBCMode, const CUGeom& cuGeom,
int puIdx, uint32_t partAddr, pixel* refY, int refStride, MV*
searchRangeLT, MV* searchRangeRB,
- MV& mv, uint32_t& cost, int roiwidth, int roiheight, bool
testOnlyPred, bool bUse1DSearchFor8x8);
+ MV& mv, uint32_t& cost, int roiwidth, int roiheight, bool
testOnlyPred, bool bUse1DSearchFor8x8, IBC& ibc);
bool isValidIntraBCSearchArea(CUData* cu, int predX, int predY,
int roiWidth, int roiHeight, int partOffset);
bool isBlockVectorValid(int xPos, int yPos, int width, int
height, CUData* pcCU,
int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize);
void intraBCSearchMVCandUpdate(uint32_t sad, int x, int y,
uint32_t* sadBestCand, MV* cMVCand);
- void updateBVMergeCandLists(int roiWidth, int roiHeight, MV*
mvCand);
+ void updateBVMergeCandLists(int roiWidth, int roiHeight, MV*
mvCand, IBC& ibc);
int intraBCSearchMVChromaRefine(Mode& intraBCMode, const CUGeom&
cuGeom, int roiWidth, int roiHeight, int cuPelX, int cuPelY, uint32_t*
sadBestCand, MV* cMVCand,
uint32_t partOffset, int puIdx);
static uint32_t mergeCandLists(MV* dst, uint32_t dn, MV* src,
uint32_t sn, bool isSrcQuarPel);
--
2.36.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240807/15c765be/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0006-Fix-run-to-run-variation-issue.patch
Type: application/octet-stream
Size: 36403 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240807/15c765be/attachment-0001.obj>
More information about the x265-devel
mailing list