[x265] [PATCH] SAO: removed estSaoTypeDist() function and made EO and BO distortion calculation separate
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Mon Jan 18 16:50:14 CET 2016
# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1453132196 -19800
# Mon Jan 18 21:19:56 2016 +0530
# Node ID 5ba1062898a0965690e2ddf28fd193321f493542
# Parent 6b430aa3ef0035e14b9e494606c1c59523335ac7
SAO: removed estSaoTypeDist() function and made EO and BO distortion calculation separate
The purpose is to avoid many condition checks.
diff -r 6b430aa3ef00 -r 5ba1062898a0 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Thu Jan 14 16:30:07 2016 +0530
+++ b/source/encoder/sao.cpp Mon Jan 18 21:19:56 2016 +0530
@@ -1516,47 +1516,6 @@
}
}
-/** rate distortion optimization of SAO unit */
-inline int64_t SAO::estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo)
-{
- int64_t estDist = 0;
-
- for (int classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? SAO_EO_LEN + 1 : SAO_NUM_BO_CLASSES + 1); classIdx++)
- {
- int32_t count = m_count[plane][typeIdx][classIdx];
- int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx];
- int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
-
- if (typeIdx == SAO_BO)
- {
- currentDistortionTableBo[classIdx - 1] = 0;
- currentRdCostTableBo[classIdx - 1] = lambda;
- }
- if (count)
- {
- int offset = roundIBDI(offsetOrg << (X265_DEPTH - 8), count);
- offset = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, offset);
- if (typeIdx < SAO_BO)
- {
- if (classIdx < 3)
- offset = X265_MAX(offset, 0);
- else
- offset = X265_MIN(offset, 0);
- }
- offsetOut = estIterOffset(typeIdx, classIdx, lambda, offset, count, offsetOrg, currentDistortionTableBo, currentRdCostTableBo);
- }
- else
- {
- offsetOrg = 0;
- offsetOut = 0;
- }
- if (typeIdx != SAO_BO)
- estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, offsetOrg);
- }
-
- return estDist;
-}
-
inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg, int32_t* currentDistortionTableBo, double* currentRdCostTableBo)
{
int offsetOut = 0;
@@ -1606,38 +1565,42 @@
m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
- for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
+ //EO distortion calculation
+ for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
{
- int64_t estDist = estSaoTypeDist(0, typeIdx, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
+ int64_t estDist = 0;
+ for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++)
+ {
+ int32_t count = m_count[0][typeIdx][classIdx];
+ int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx];
+ int32_t& offsetOut = m_offset[0][typeIdx][classIdx];
- if (typeIdx == SAO_BO)
- {
- // Estimate Best Position
- for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
+ if (count)
{
- double currentRDCost = 0.0;
- for (int j = i; j < i + SAO_BO_LEN; j++)
- currentRDCost += currentRdCostTableBo[j];
+ int offset = roundIBDI(offsetOrg << (X265_DEPTH - 8), count);
+ offset = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, offset);
- if (currentRDCost < bestRDCostTableBo)
- {
- bestRDCostTableBo = currentRDCost;
- bestClassTableBo = i;
- }
+ if (classIdx < 3)
+ offset = X265_MAX(offset, 0);
+ else
+ offset = X265_MIN(offset, 0);
+
+ offsetOut = estIterOffset(typeIdx, classIdx, m_lumaLambda, offset, count, offsetOrg, currentDistortionTableBo, currentRdCostTableBo);
}
+ else
+ {
+ offsetOrg = 0;
+ offsetOut = 0;
+ }
+ estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, offsetOrg);
+ }
- // Re code all Offsets
- // Code Center
- estDist = 0;
- for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
- estDist += currentDistortionTableBo[classIdx];
- }
SaoCtuParam ctuParamRdo;
ctuParamRdo.mergeMode = SAO_MERGE_NONE;
- ctuParamRdo.typeIdx = typeIdx;
- ctuParamRdo.bandPos = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
+ ctuParamRdo.typeIdx = typeIdx;
+ ctuParamRdo.bandPos = 0;
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
- ctuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + ctuParamRdo.bandPos + 1];
+ ctuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + 1];
m_entropyCoder.load(m_rdContexts.temp);
m_entropyCoder.resetBits();
@@ -1654,6 +1617,70 @@
}
}
+ //BO distortion calculation
+ int64_t estDist = 0;
+ for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++)
+ {
+ int32_t count = m_count[0][SAO_BO][classIdx];
+ int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];
+ int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];
+
+ currentDistortionTableBo[classIdx - 1] = 0;
+ currentRdCostTableBo[classIdx - 1] = m_lumaLambda;
+
+ if (count)
+ {
+ int offset = roundIBDI(offsetOrg << (X265_DEPTH - 8), count);
+ offset = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, offset);
+
+ offsetOut = estIterOffset(SAO_BO, classIdx, m_lumaLambda, offset, count, offsetOrg, currentDistortionTableBo, currentRdCostTableBo);
+ }
+ else
+ {
+ offsetOrg = 0;
+ offsetOut = 0;
+ }
+ }
+
+ // Estimate Best Position
+ for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
+ {
+ double currentRDCost = 0.0;
+ for (int j = i; j < i + SAO_BO_LEN; j++)
+ currentRDCost += currentRdCostTableBo[j];
+
+ if (currentRDCost < bestRDCostTableBo)
+ {
+ bestRDCostTableBo = currentRDCost;
+ bestClassTableBo = i;
+ }
+ }
+
+ estDist = 0;
+ for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
+ estDist += currentDistortionTableBo[classIdx];
+
+ SaoCtuParam ctuParamRdo;
+ ctuParamRdo.mergeMode = SAO_MERGE_NONE;
+ ctuParamRdo.typeIdx = SAO_BO;
+ ctuParamRdo.bandPos = bestClassTableBo;
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
+ ctuParamRdo.offset[classIdx] = (int)m_offset[0][SAO_BO][classIdx + ctuParamRdo.bandPos + 1];
+
+ m_entropyCoder.load(m_rdContexts.temp);
+ m_entropyCoder.resetBits();
+ m_entropyCoder.codeSaoOffset(ctuParamRdo, 0);
+
+ uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
+ double cost = (double)estDist + m_lumaLambda * (double)estRate;
+
+ if (cost < dCostPartBest)
+ {
+ dCostPartBest = cost;
+ copySaoUnit(lclCtuParam, &ctuParamRdo);
+ bestDist = estDist;
+ }
+
mergeDist[0] = ((double)bestDist / m_lumaLambda);
m_entropyCoder.load(m_rdContexts.temp);
m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
@@ -1677,41 +1704,39 @@
double costPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_chromaLambda;
- for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
+ //EO distortion calculation
+ for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
{
- int64_t estDist[2];
- if (typeIdx == SAO_BO)
+ int64_t estDist[2] = {0, 0};
+ for (int compIdx = 1; compIdx < 3; compIdx++)
{
- // Estimate Best Position
- for (int compIdx = 0; compIdx < 2; compIdx++)
+ for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++)
{
- double bestRDCostTableBo = MAX_DOUBLE;
- estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
- for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
+ int32_t count = m_count[compIdx][typeIdx][classIdx];
+ int32_t& offsetOrg = m_offsetOrg[compIdx][typeIdx][classIdx];
+ int32_t& offsetOut = m_offset[compIdx][typeIdx][classIdx];
+
+ if (count)
{
- double currentRDCost = 0.0;
- for (int j = i; j < i + SAO_BO_LEN; j++)
- currentRDCost += currentRdCostTableBo[j];
+ int offset = roundIBDI(offsetOrg << (X265_DEPTH - 8), count);
+ offset = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, offset);
- if (currentRDCost < bestRDCostTableBo)
- {
- bestRDCostTableBo = currentRDCost;
- bestClassTableBo[compIdx] = i;
- }
+ if (classIdx < 3)
+ offset = X265_MAX(offset, 0);
+ else
+ offset = X265_MIN(offset, 0);
+
+ offsetOut = estIterOffset(typeIdx, classIdx, m_chromaLambda, offset, count, offsetOrg, currentDistortionTableBo, currentRdCostTableBo);
}
-
- // Re code all Offsets
- // Code Center
- estDist[compIdx] = 0;
- for (int classIdx = bestClassTableBo[compIdx]; classIdx < bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++)
- estDist[compIdx] += currentDistortionTableBo[classIdx];
+ else
+ {
+ offsetOrg = 0;
+ offsetOut = 0;
+ }
+ if (typeIdx != SAO_BO)
+ estDist[compIdx - 1] += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, offsetOrg);
}
}
- else
- {
- estDist[0] = estSaoTypeDist(1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
- estDist[1] = estSaoTypeDist(2, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
- }
m_entropyCoder.load(m_rdContexts.temp);
m_entropyCoder.resetBits();
@@ -1721,9 +1746,9 @@
{
ctuParamRdo[compIdx].mergeMode = SAO_MERGE_NONE;
ctuParamRdo[compIdx].typeIdx = typeIdx;
- ctuParamRdo[compIdx].bandPos = (typeIdx == SAO_BO) ? bestClassTableBo[compIdx] : 0;
+ ctuParamRdo[compIdx].bandPos = 0;
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
- ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + ctuParamRdo[compIdx].bandPos + 1];
+ ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + 1];
m_entropyCoder.codeSaoOffset(ctuParamRdo[compIdx], compIdx + 1);
}
@@ -1740,6 +1765,81 @@
}
}
+ // BO distortion calculation
+ int64_t estDist[2];
+
+ // Estimate Best Position
+ for (int compIdx = 1; compIdx < 3; compIdx++)
+ {
+ double bestRDCostTableBo = MAX_DOUBLE;
+
+ for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++)
+ {
+ int32_t count = m_count[compIdx][SAO_BO][classIdx];
+ int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];
+ int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];
+
+ currentDistortionTableBo[classIdx - 1] = 0;
+ currentRdCostTableBo[classIdx - 1] = m_chromaLambda;
+
+ if (count)
+ {
+ int offset = roundIBDI(offsetOrg << (X265_DEPTH - 8), count);
+ offset = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, offset);
+
+ offsetOut = estIterOffset(SAO_BO, classIdx, m_chromaLambda, offset, count, offsetOrg, currentDistortionTableBo, currentRdCostTableBo);
+ }
+ else
+ {
+ offsetOrg = 0;
+ offsetOut = 0;
+ }
+ }
+
+ for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
+ {
+ double currentRDCost = 0.0;
+ for (int j = i; j < i + SAO_BO_LEN; j++)
+ currentRDCost += currentRdCostTableBo[j];
+
+ if (currentRDCost < bestRDCostTableBo)
+ {
+ bestRDCostTableBo = currentRDCost;
+ bestClassTableBo[compIdx - 1] = i;
+ }
+ }
+
+ estDist[compIdx - 1] = 0;
+ for (int classIdx = bestClassTableBo[compIdx - 1]; classIdx < bestClassTableBo[compIdx - 1] + SAO_BO_LEN; classIdx++)
+ estDist[compIdx - 1] += currentDistortionTableBo[classIdx];
+ }
+
+ m_entropyCoder.load(m_rdContexts.temp);
+ m_entropyCoder.resetBits();
+
+ SaoCtuParam ctuParamRdo[2];
+ for (int compIdx = 0; compIdx < 2; compIdx++)
+ {
+ ctuParamRdo[compIdx].mergeMode = SAO_MERGE_NONE;
+ ctuParamRdo[compIdx].typeIdx = SAO_BO;
+ ctuParamRdo[compIdx].bandPos = bestClassTableBo[compIdx];
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
+ ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][SAO_BO][classIdx + ctuParamRdo[compIdx].bandPos + 1];
+
+ m_entropyCoder.codeSaoOffset(ctuParamRdo[compIdx], compIdx + 1);
+ }
+
+ uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
+ double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate;
+
+ if (cost < costPartBest)
+ {
+ costPartBest = cost;
+ copySaoUnit(lclCtuParam[0], &ctuParamRdo[0]);
+ copySaoUnit(lclCtuParam[1], &ctuParamRdo[1]);
+ bestDist = (estDist[0] + estDist[1]);
+ }
+
mergeDist[0] += ((double)bestDist / m_chromaLambda);
m_entropyCoder.load(m_rdContexts.temp);
m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
diff -r 6b430aa3ef00 -r 5ba1062898a0 source/encoder/sao.h
--- a/source/encoder/sao.h Thu Jan 14 16:30:07 2016 +0530
+++ b/source/encoder/sao.h Mon Jan 18 21:19:56 2016 +0530
@@ -146,8 +146,6 @@
inline int estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg,
int32_t* currentDistortionTableBo, double* currentRdCostTableBo);
- inline int64_t estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo);
-
void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int addr);
More information about the x265-devel
mailing list