[x265] [PATCH] SAO: modified sao rdo cost calculation
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Wed Apr 6 10:59:43 CEST 2016
# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1459933165 -19800
# Wed Apr 06 14:29:25 2016 +0530
# Node ID a747b7c2d77bdc2037020019d99a8d3c09d532c8
# Parent 511241d3ee7d6d53a999d46881e7921a601fa0e9
SAO: modified sao rdo cost calculation
sao type, eo and bo class are all fixed bit syntax elements, so no need
to load the context repeatedly.
diff -r 511241d3ee7d -r a747b7c2d77b source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Wed Apr 06 13:12:17 2016 +0530
+++ b/source/encoder/entropy.cpp Wed Apr 06 14:29:25 2016 +0530
@@ -1021,42 +1021,14 @@
}
}
-void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
+void Entropy::codeSaoTypeBits(int type, int plane)
{
- if (plane != 2)
- {
- encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
- encodeBinEP(1);
- }
+ if (plane == 2)
+ return;
- enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
-
- codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
- codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
- codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
- codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
- if (plane != 2)
- encodeBinsEP((uint32_t)(typeIdx), 2);
-}
-
-void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
-{
- if (plane != 2)
- {
- encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
- encodeBinEP(0);
- }
-
- enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
-
- for (int i = 0; i < SAO_NUM_OFFSET; i++)
- codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
-
- for (int i = 0; i < SAO_NUM_OFFSET; i++)
- if (offset[i] != 0)
- encodeBinEP(offset[i] < 0);
-
- encodeBinsEP(bandPos, 5);
+ encodeBin(!!type, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
+ if (type)
+ encodeBinEP(type - 1);
}
/** initialize context model with respect to QP and initialization value */
diff -r 511241d3ee7d -r a747b7c2d77b source/encoder/entropy.h
--- a/source/encoder/entropy.h Wed Apr 06 13:12:17 2016 +0530
+++ b/source/encoder/entropy.h Wed Apr 06 14:29:25 2016 +0530
@@ -169,7 +169,6 @@
void codeCoeffNxN(const CUData& cu, const coeff_t* coef, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
inline void codeSaoMerge(uint32_t code) { encodeBin(code, m_contextState[OFF_SAO_MERGE_FLAG_CTX]); }
- inline void codeSaoType(uint32_t code) { encodeBin(code, m_contextState[OFF_SAO_TYPE_IDX_CTX]); }
inline void codeMVPIdx(uint32_t symbol) { encodeBin(symbol, m_contextState[OFF_MVP_IDX_CTX]); }
inline void codeMergeFlag(const CUData& cu, uint32_t absPartIdx) { encodeBin(cu.m_mergeFlag[absPartIdx], m_contextState[OFF_MERGE_FLAG_EXT_CTX]); }
inline void codeSkipFlag(const CUData& cu, uint32_t absPartIdx) { encodeBin(cu.isSkipped(absPartIdx), m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); }
@@ -183,8 +182,7 @@
inline void codeTransformSkipFlags(uint32_t transformSkip, TextType ttype) { encodeBin(transformSkip, m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX + (ttype ? NUM_TRANSFORMSKIP_FLAG_CTX : 0)]); }
void codeDeltaQP(const CUData& cu, uint32_t absPartIdx);
void codeSaoOffset(const SaoCtuParam& ctuParam, int plane);
- void codeSaoOffsetEO(int *offset, int typeIdx, int plane);
- void codeSaoOffsetBO(int *offset, int bandPos, int plane);
+ void codeSaoTypeBits(int type, int plane);
/* RDO functions */
void estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const;
diff -r 511241d3ee7d -r a747b7c2d77b source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Wed Apr 06 13:12:17 2016 +0530
+++ b/source/encoder/sao.cpp Wed Apr 06 14:29:25 2016 +0530
@@ -1247,13 +1247,11 @@
m_entropyCoder.codeSaoMerge(0);
m_entropyCoder.store(m_rdContexts.temp);
- int64_t mergeDist[NUM_MERGE_MODE] = { 0 };
int64_t bestCost = 0;
-
// Estimate distortion and cost of new SAO params
- saoLumaComponentParamDist(saoParam, addr, mergeDist, lambda, bestCost);
+ saoLumaComponentParamDist(saoParam, addr, lambda, bestCost);
if (chroma)
- saoChromaComponentParamDist(saoParam, addr, mergeDist, lambda, bestCost);
+ saoChromaComponentParamDist(saoParam, addr, lambda, bestCost);
if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
{
@@ -1263,9 +1261,18 @@
if (!allowMerge[mergeIdx])
continue;
+ m_entropyCoder.load(m_rdContexts.cur);
+ m_entropyCoder.resetBits();
+ if (allowMerge[0])
+ m_entropyCoder.codeSaoMerge(1 - mergeIdx);
+ if (allowMerge[1] && (mergeIdx == 1))
+ m_entropyCoder.codeSaoMerge(1);
+
+ int32_t rate = m_entropyCoder.getNumberOfWrittenBits();
+
+ int64_t estDist = 0;
for (int plane = 0; plane < 3; plane++)
{
- int64_t estDist = 0;
SaoCtuParam* mergeSrcParam = &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]);
int typeIdx = mergeSrcParam->typeIdx;
if (typeIdx >= 0)
@@ -1277,18 +1284,11 @@
estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos]);
}
}
- mergeDist[mergeIdx + 1] += (estDist / (lambda[!!plane] >> 8));
}
- m_entropyCoder.load(m_rdContexts.cur);
- m_entropyCoder.resetBits();
- if (allowMerge[0])
- m_entropyCoder.codeSaoMerge(1 - mergeIdx);
- if (allowMerge[1] && (mergeIdx == 1))
- m_entropyCoder.codeSaoMerge(1);
+ // Considering only luma lambda
+ int64_t mergeCost = calcSaoRdoCost(estDist, rate, lambda[0]);
- uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
- int64_t mergeCost = mergeDist[mergeIdx + 1] + rate;
// Compare merge cost with best offset cost
if (mergeCost < bestCost)
{
@@ -1372,7 +1372,7 @@
}
}
-inline int64_t SAO::calcSaoRdoCost(int64_t distortion, uint32_t bits, int64_t lambda)
+inline int64_t SAO::calcSaoRdoCost(int64_t distortion, int32_t bits, int64_t lambda)
{
#if X265_DEPTH < 10
X265_CHECK(bits <= (INT64_MAX - 128) / lambda,
@@ -1386,10 +1386,9 @@
return distortion + ((bits * lambda + 128) >> 8);
}
-void SAO::estIterOffset(int typeIdx, int64_t lambda, int32_t count, int32_t offsetOrg, int32_t& offset, int32_t& distClasses, int64_t& costClasses)
+void SAO::estIterOffset(int32_t typeIdx, int64_t lambda, int32_t count, int32_t offsetOrg, int32_t& offset, int64_t& costClasses)
{
int bestOffset = 0;
- distClasses = 0;
// Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit.
// entropy coder can be used to measure the exact rate here.
@@ -1408,7 +1407,6 @@
{
bestCost = cost;
bestOffset = offset;
- distClasses = (int)dist;
}
offset = (offset > 0) ? (offset - 1) : (offset + 1);
}
@@ -1417,49 +1415,48 @@
offset = bestOffset;
}
-void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int32_t addr, int64_t* mergeDist, int64_t* lambda, int64_t &bestCost)
+void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int32_t addr, int64_t* lambda, int64_t &bestCost)
{
- int64_t bestDist = 0;
int bestTypeIdx = -1;
SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
-
- int32_t distClasses[MAX_NUM_SAO_CLASS];
int64_t costClasses[MAX_NUM_SAO_CLASS];
// RDO SAO_NA
m_entropyCoder.load(m_rdContexts.temp);
m_entropyCoder.resetBits();
- m_entropyCoder.codeSaoType(0);
- uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
+ m_entropyCoder.codeSaoTypeBits(0, 0);
+ int32_t rate = m_entropyCoder.getNumberOfWrittenBits();
int64_t costPartBest = calcSaoRdoCost(0, rate, lambda[0]);
// RDO SAO_EO
+ m_entropyCoder.load(m_rdContexts.temp);
+ m_entropyCoder.resetBits();
+ m_entropyCoder.codeSaoTypeBits(2, 0); // sao_type_idx_ctx for EO = 2.
+ int32_t eo_bits_type = m_entropyCoder.getNumberOfWrittenBits();
+ int64_t eo_type_cost = calcSaoRdoCost(0, eo_bits_type, lambda[0]);
+
for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
{
- int64_t estDist = 0;
+ int64_t cost = 0;
for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)
{
int32_t& count = m_count[0][typeIdx][classIdx];
int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx];
int32_t& offsetOut = m_offset[0][typeIdx][classIdx];
- estIterOffset(typeIdx, lambda[0], count, offsetOrg, offsetOut, distClasses[classIdx], costClasses[classIdx]);
+ estIterOffset(typeIdx, lambda[0], count, offsetOrg, offsetOut, costClasses[classIdx]);
//Calculate distortion
- estDist += distClasses[classIdx];
+ cost += costClasses[classIdx];
}
- m_entropyCoder.load(m_rdContexts.temp);
- m_entropyCoder.resetBits();
- m_entropyCoder.codeSaoOffsetEO(m_offset[0][typeIdx] + 1, typeIdx, 0);
- uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
- int64_t cost = calcSaoRdoCost(estDist, rate, lambda[0]);
+ // sao_eo_class_luma and sao_eo_class_chroma are fixed-length 2-bit syntax elements.
+ cost += eo_type_cost + calcSaoRdoCost(0, 2, lambda[0]);
if (cost < costPartBest)
{
costPartBest = cost;
- bestDist = estDist;
bestTypeIdx = typeIdx;
}
}
@@ -1474,14 +1471,19 @@
}
// RDO SAO_BO
- int64_t estDist = 0;
+ m_entropyCoder.load(m_rdContexts.temp);
+ m_entropyCoder.resetBits();
+ m_entropyCoder.codeSaoTypeBits(1, 0); // sao_type_idx_ctx for BO = 1.
+ int32_t bo_bits_type = m_entropyCoder.getNumberOfWrittenBits();
+ int64_t bo_type_cost = calcSaoRdoCost(0, bo_bits_type, lambda[0]);
+
for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
{
int32_t& count = m_count[0][SAO_BO][classIdx];
int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];
int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];
- estIterOffset(SAO_BO, lambda[0], count, offsetOrg, offsetOut, distClasses[classIdx], costClasses[classIdx]);
+ estIterOffset(SAO_BO, lambda[0], count, offsetOrg, offsetOut, costClasses[classIdx]);
}
// Estimate Best Position
@@ -1501,21 +1503,12 @@
}
}
- estDist = 0;
- for (int classIdx = bestClassBO; classIdx < bestClassBO + SAO_NUM_OFFSET; classIdx++)
- estDist += distClasses[classIdx];
-
- // Estimate best BO cost
- m_entropyCoder.load(m_rdContexts.temp);
- m_entropyCoder.resetBits();
- m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + bestClassBO, bestClassBO, 0);
- uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
- int64_t cost = calcSaoRdoCost(estDist, estRate, lambda[0]);
+ // sao_eo_class_luma and sao_bo_class_chroma are fixed-length 5-bit syntax elements.
+ int64_t cost = bestRDCostBO + bo_type_cost + calcSaoRdoCost(0, 5, lambda[0]);
if (cost < costPartBest)
{
costPartBest = cost;
- bestDist = estDist;
lclCtuParam->mergeMode = SAO_MERGE_NONE;
lclCtuParam->typeIdx = SAO_BO;
@@ -1524,38 +1517,39 @@
lclCtuParam->offset[classIdx] = m_offset[0][SAO_BO][classIdx + bestClassBO];
}
- mergeDist[0] = bestDist / (lambda[0] >> 8);
-
m_entropyCoder.load(m_rdContexts.temp);
m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
m_entropyCoder.store(m_rdContexts.temp);
- uint32_t bits = m_entropyCoder.getNumberOfWrittenBits();
- bestCost = mergeDist[0] + bits;
+ bestCost = costPartBest;
}
-void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int32_t addr, int64_t* mergeDist, int64_t* lambda, int64_t &bestCost)
+void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int32_t addr, int64_t* lambda, int64_t &bestCost)
{
- int64_t bestDist = 0;
int bestTypeIdx = -1;
SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], &saoParam->ctuParam[2][addr] };
int64_t costClasses[MAX_NUM_SAO_CLASS];
- int32_t distClasses[MAX_NUM_SAO_CLASS];
int32_t bestClassBO[2] = { 0, 0 };
// RDO SAO_NA
m_entropyCoder.load(m_rdContexts.temp);
m_entropyCoder.resetBits();
- m_entropyCoder.codeSaoType(0);
- uint32_t bits = m_entropyCoder.getNumberOfWrittenBits();
+ m_entropyCoder.codeSaoTypeBits(0, 0);
+ int32_t bits = m_entropyCoder.getNumberOfWrittenBits();
int64_t costPartBest = calcSaoRdoCost(0, bits, lambda[1]);
// RDO SAO_EO
+ m_entropyCoder.load(m_rdContexts.temp);
+ m_entropyCoder.resetBits();
+ m_entropyCoder.codeSaoTypeBits(2, 0); // sao_type_idx_ctx for EO = 2.
+ int32_t eo_bits_type = m_entropyCoder.getNumberOfWrittenBits();
+ int64_t eo_type_cost = calcSaoRdoCost(0, eo_bits_type, lambda[1]);
+
for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
{
- int64_t estDist[2] = {0, 0};
+ int64_t cost = 0;
for (int compIdx = 1; compIdx < 3; compIdx++)
{
for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)
@@ -1564,25 +1558,18 @@
int32_t& offsetOrg = m_offsetOrg[compIdx][typeIdx][classIdx];
int32_t& offsetOut = m_offset[compIdx][typeIdx][classIdx];
- estIterOffset(typeIdx, lambda[1], count, offsetOrg, offsetOut, distClasses[classIdx], costClasses[classIdx]);
+ estIterOffset(typeIdx, lambda[1], count, offsetOrg, offsetOut, costClasses[classIdx]);
- estDist[compIdx - 1] += distClasses[classIdx];
+ cost += costClasses[classIdx];
}
}
- m_entropyCoder.load(m_rdContexts.temp);
- m_entropyCoder.resetBits();
-
- for (int compIdx = 0; compIdx < 2; compIdx++)
- m_entropyCoder.codeSaoOffsetEO(m_offset[compIdx + 1][typeIdx] + 1, typeIdx, compIdx + 1);
-
- uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
- int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]), estRate, lambda[1]);
+ // sao_eo_class_luma and sao_eo_class_chroma are fixed-length 2-bit syntax elements.
+ cost += eo_type_cost + calcSaoRdoCost(0, 2, lambda[1]);
if (cost < costPartBest)
{
costPartBest = cost;
- bestDist = (estDist[0] + estDist[1]);
bestTypeIdx = typeIdx;
}
}
@@ -1600,8 +1587,13 @@
}
// RDO SAO_BO
- int64_t estDist[2];
+ m_entropyCoder.load(m_rdContexts.temp);
+ m_entropyCoder.resetBits();
+ m_entropyCoder.codeSaoTypeBits(1, 0); // sao_type_idx_ctx for BO = 1.
+ int32_t bo_bits_type = m_entropyCoder.getNumberOfWrittenBits();
+ int64_t bo_type_cost = calcSaoRdoCost(0, bo_bits_type, lambda[1]);
+ int64_t cost = 0;
// Estimate Best Position
for (int compIdx = 1; compIdx < 3; compIdx++)
{
@@ -1613,7 +1605,7 @@
int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];
int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];
- estIterOffset(SAO_BO, lambda[1], count, offsetOrg, offsetOut, distClasses[classIdx], costClasses[classIdx]);
+ estIterOffset(SAO_BO, lambda[1], count, offsetOrg, offsetOut, costClasses[classIdx]);
}
for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++)
@@ -1629,24 +1621,15 @@
}
}
- estDist[compIdx - 1] = 0;
- for (int classIdx = bestClassBO[compIdx - 1]; classIdx < bestClassBO[compIdx - 1] + SAO_NUM_OFFSET; classIdx++)
- estDist[compIdx - 1] += distClasses[classIdx];
+ cost += bestRDCostBO;
}
- m_entropyCoder.load(m_rdContexts.temp);
- m_entropyCoder.resetBits();
-
- for (int compIdx = 0; compIdx < 2; compIdx++)
- m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] + bestClassBO[compIdx], bestClassBO[compIdx], compIdx + 1);
-
- uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
- int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]), estRate, lambda[1]);
+ // sao_eo_class_luma and sao_bo_class_chroma are fixed-length 5-bit syntax elements.
+ cost += bo_type_cost + calcSaoRdoCost(0, 5, lambda[1]);
if (cost < costPartBest)
{
costPartBest = cost;
- bestDist = (estDist[0] + estDist[1]);
for (int compIdx = 0; compIdx < 2; compIdx++)
{
@@ -1658,15 +1641,12 @@
}
}
- mergeDist[0] += (bestDist / (lambda[1] >> 8));
m_entropyCoder.load(m_rdContexts.temp);
-
m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
m_entropyCoder.store(m_rdContexts.temp);
- uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
- bestCost = mergeDist[0] + rate;
+ bestCost += costPartBest;
}
// NOTE: must put in namespace X265_NS since we need class SAO
diff -r 511241d3ee7d -r a747b7c2d77b source/encoder/sao.h
--- a/source/encoder/sao.h Wed Apr 06 13:12:17 2016 +0530
+++ b/source/encoder/sao.h Wed Apr 06 14:29:25 2016 +0530
@@ -54,7 +54,6 @@
enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
enum { NUM_EDGETYPE = 5 };
enum { NUM_PLANE = 3 };
- enum { NUM_MERGE_MODE = 3 };
enum { SAO_DEPTHRATE_SIZE = 4 };
static const uint32_t s_eoTable[NUM_EDGETYPE];
@@ -127,14 +126,13 @@
void calcSaoStatsCu(int addr, int plane);
void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
- void saoLumaComponentParamDist(SAOParam* saoParam, int addr, int64_t* mergeDist, int64_t* lambda, int64_t &bestCost);
- void saoChromaComponentParamDist(SAOParam* saoParam, int addr, int64_t* mergeDist, int64_t* lambda, int64_t &bestCost);
+ void saoLumaComponentParamDist(SAOParam* saoParam, int addr, int64_t* lambda, int64_t &bestCost);
+ void saoChromaComponentParamDist(SAOParam* saoParam, int addr, int64_t* lambda, int64_t &bestCost);
- void estIterOffset(int typeIdx, int64_t lambda, int32_t count, int32_t offsetOrg, int32_t& offset,
- int32_t& distClasses, int64_t& costClasses);
+ void estIterOffset(int32_t typeIdx, int64_t lambda, int32_t count, int32_t offsetOrg, int32_t& offset, int64_t& costClasses);
void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int addr);
- int64_t calcSaoRdoCost(int64_t distortion, uint32_t bits, int64_t lambda);
+ int64_t calcSaoRdoCost(int64_t distortion, int32_t bits, int64_t lambda);
void saoStatsInitialOffset(bool chroma);
More information about the x265-devel
mailing list