<div dir="ltr">Is there an impact of this on efficiency/performance? Do you have some results that you can share?</div><div class="gmail_extra"><br clear="all"><div><div class="gmail_signature"><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><div dir="ltr"><div dir="ltr"><div dir="ltr"><div dir="ltr">Pradeep Ramachandran, PhD<div>Solution Architect at <a href="http://www.multicorewareinc.com/" style="font-size:12.8px" target="_blank">www.multicorewareinc.com/</a></div><div>Visiting Professor at<a href="http://www.cse.iitm.ac.in" target="_blank"> www.cse.iitm.ac.in</a>/</div><div><a href="http://pradeeprama.info/" style="font-size:12.8px" target="_blank">pradeeprama.info/</a><br></div><div><span style="font-size:12.8px">Ph: +91 99627 82018</span><br></div></div></div></div></div></div></div></div></div></div></div></div>
<br><div class="gmail_quote">On Wed, May 4, 2016 at 7:09 AM, <span dir="ltr"><<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Ashok Kumar Mishra<<a href="mailto:ashok@multicorewareinc.com">ashok@multicorewareinc.com</a>><br>
# Date 1462355258 -19800<br>
# Wed May 04 15:17:38 2016 +0530<br>
# Node ID 70a0888d0703a35b0c3c3a57f96931d0767eb470<br>
# Parent 9f27620a948b67498056246b97db72bebac99218<br>
[OUTPUT CHANGED]SAO: convert sao rdo cost calculation from float to int<br>
<br>
diff -r 9f27620a948b -r 70a0888d0703 source/encoder/sao.cpp<br>
--- a/source/encoder/sao.cpp Mon Apr 25 13:39:54 2016 +0530<br>
+++ b/source/encoder/sao.cpp Wed May 04 15:17:38 2016 +0530<br>
@@ -53,7 +53,7 @@<br>
return r;<br>
}<br>
<br>
-inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg)<br>
+inline int64_t estSaoDist(int32_t count, int32_t offset, int32_t offsetOrg)<br>
{<br>
return (count * offset - offsetOrg * 2) * offset;<br>
}<br>
@@ -1193,7 +1193,7 @@<br>
const CUData* cu = m_frame->m_encData->getPicCTU(addr);<br>
int qp = cu->m_qp[0];<br>
<br>
- double lambda[2] = {0.0};<br>
+ int64_t lambda[2] = { 0 };<br>
<br>
int qpCb = qp;<br>
if (m_param->internalCsp == X265_CSP_I420)<br>
@@ -1201,8 +1201,8 @@<br>
else<br>
qpCb = X265_MIN(qp + slice->m_pps->chromaQpOffset[0], QP_MAX_SPEC);<br>
<br>
- lambda[0] = x265_lambda2_tab[qp];<br>
- lambda[1] = x265_lambda2_tab[qpCb]; // Use Cb QP for SAO chroma<br>
+ lambda[0] = (int64_t)floor(256.0 * x265_lambda2_tab[qp]);<br>
+ lambda[1] = (int64_t)floor(256.0 * x265_lambda2_tab[qpCb]); // Use Cb QP for SAO chroma<br>
<br>
const bool allowMerge[2] = {(idxX != 0), (rowBaseAddr != 0)}; // left, up<br>
<br>
@@ -1250,8 +1250,8 @@<br>
m_entropyCoder.store(m_rdContexts.temp);<br>
<br>
// Estimate distortion and cost of new SAO params<br>
- double bestCost = 0.0;<br>
- double rateDist = 0.0;<br>
+ int64_t bestCost = 0;<br>
+ int64_t rateDist = 0;<br>
// Estimate distortion and cost of new SAO params<br>
saoLumaComponentParamDist(saoParam, addr, rateDist, lambda, bestCost);<br>
if (chroma)<br>
@@ -1265,7 +1265,7 @@<br>
if (!allowMerge[mergeIdx])<br>
continue;<br>
<br>
- double mergeDist = 0;<br>
+ int64_t mergeDist = 0;<br>
for (int plane = 0; plane < planes; plane++)<br>
{<br>
int64_t estDist = 0;<br>
@@ -1280,7 +1280,7 @@<br>
estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos]);<br>
}<br>
}<br>
- mergeDist += ((double)estDist / lambda[!!plane]);<br>
+ mergeDist += (estDist / (lambda[!!plane] >> 8));<br>
}<br>
<br>
m_entropyCoder.load(m_rdContexts.cur);<br>
@@ -1290,8 +1290,8 @@<br>
if (allowMerge[1] && (mergeIdx == 1))<br>
m_entropyCoder.codeSaoMerge(1);<br>
<br>
- int32_t estRate = m_entropyCoder.getNumberOfWrittenBits();<br>
- double mergeCost = mergeDist + (double)estRate;<br>
+ uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();<br>
+ int64_t mergeCost = mergeDist + estRate;<br>
if (mergeCost < bestCost)<br>
{<br>
SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;<br>
@@ -1337,7 +1337,7 @@<br>
{<br>
for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)<br>
{<br>
- int32_t count = m_count[plane][typeIdx][classIdx];<br>
+ int32_t& count = m_count[plane][typeIdx][classIdx];<br>
int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx];<br>
int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];<br>
<br>
@@ -1360,7 +1360,7 @@<br>
{<br>
for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)<br>
{<br>
- int32_t count = m_count[plane][SAO_BO][classIdx];<br>
+ int32_t& count = m_count[plane][SAO_BO][classIdx];<br>
int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx];<br>
int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx];<br>
<br>
@@ -1373,14 +1373,27 @@<br>
}<br>
}<br>
<br>
-void SAO::estIterOffset(int typeIdx, double lambda, int32_t count, int32_t offsetOrg, int& offset, int& distClasses, double& costClasses)<br>
+inline int64_t SAO::calcSaoRdoCost(int64_t distortion, uint32_t bits, int64_t lambda)<br>
+{<br>
+#if X265_DEPTH < 10<br>
+ X265_CHECK(bits <= (INT64_MAX - 128) / lambda,<br>
+ "calcRdCost wrap detected dist: %u, bits %u, lambda: " X265_LL "\n",<br>
+ distortion, bits, lambda);<br>
+#else<br>
+ X265_CHECK(bits <= (INT64_MAX - 128) / lambda2,<br>
+ "calcRdCost wrap detected dist: " X265_LL ", bits %u, lambda: " X265_LL "\n",<br>
+ distortion, bits, lambda);<br>
+#endif<br>
+ return distortion + ((bits * lambda + 128) >> 8);<br>
+}<br>
+void SAO::estIterOffset(int typeIdx, int64_t lambda, int32_t count, int32_t offsetOrg, int32_t& offset, int32_t& distClasses, int64_t& costClasses)<br>
{<br>
int bestOffset = 0;<br>
distClasses = 0;<br>
<br>
// Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit.<br>
// entropy coder can be used to measure the exact rate here.<br>
- double bestCost = lambda;<br>
+ int64_t bestCost = calcSaoRdoCost(0, 1, lambda);<br>
while (offset != 0)<br>
{<br>
// Calculate the bits required for signalling the offset<br>
@@ -1390,7 +1403,7 @@<br>
<br>
// Do the dequntization before distorion calculation<br>
int64_t dist = estSaoDist(count, offset << SAO_BIT_INC, offsetOrg);<br>
- double cost = ((double)dist + lambda * (double)rate);<br>
+ int64_t cost = calcSaoRdoCost(dist, rate, lambda);<br>
if (cost < bestCost)<br>
{<br>
bestCost = cost;<br>
@@ -1404,22 +1417,23 @@<br>
offset = bestOffset;<br>
}<br>
<br>
-void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr, double& rateDist, double* lambda, double &bestCost)<br>
+void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int32_t addr, int64_t& rateDist, int64_t* lambda, int64_t &bestCost)<br>
{<br>
int64_t bestDist = 0;<br>
int bestTypeIdx = -1;<br>
<br>
SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];<br>
<br>
- int distClasses[MAX_NUM_SAO_CLASS];<br>
- double costClasses[MAX_NUM_SAO_CLASS];<br>
+ int32_t distClasses[MAX_NUM_SAO_CLASS];<br>
+ int64_t costClasses[MAX_NUM_SAO_CLASS];<br>
<br>
// RDO SAO_NA<br>
m_entropyCoder.load(m_rdContexts.temp);<br>
m_entropyCoder.resetBits();<br>
m_entropyCoder.codeSaoType(0);<br>
<br>
- double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * lambda[0];<br>
+ uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();<br>
+ int64_t costPartBest = calcSaoRdoCost(0, rate, lambda[0]);<br>
<br>
//EO distortion calculation<br>
for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)<br>
@@ -1427,7 +1441,7 @@<br>
int64_t estDist = 0;<br>
for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)<br>
{<br>
- int32_t count = m_count[0][typeIdx][classIdx];<br>
+ int32_t& count = m_count[0][typeIdx][classIdx];<br>
int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx];<br>
int32_t& offsetOut = m_offset[0][typeIdx][classIdx];<br>
<br>
@@ -1441,12 +1455,12 @@<br>
m_entropyCoder.resetBits();<br>
m_entropyCoder.codeSaoOffsetEO(m_offset[0][typeIdx] + 1, typeIdx, 0);<br>
<br>
- uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();<br>
- double cost = (double)estDist + lambda[0] * (double)estRate;<br>
+ uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();<br>
+ int64_t cost = calcSaoRdoCost(estDist, rate, lambda[0]);<br>
<br>
- if (cost < dCostPartBest)<br>
+ if (cost < costPartBest)<br>
{<br>
- dCostPartBest = cost;<br>
+ costPartBest = cost;<br>
bestDist = estDist;<br>
bestTypeIdx = typeIdx;<br>
}<br>
@@ -1458,14 +1472,14 @@<br>
lclCtuParam->typeIdx = bestTypeIdx;<br>
lclCtuParam->bandPos = 0;<br>
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)<br>
- lclCtuParam->offset[classIdx] = (int)m_offset[0][bestTypeIdx][classIdx + 1];<br>
+ lclCtuParam->offset[classIdx] = m_offset[0][bestTypeIdx][classIdx + 1];<br>
}<br>
<br>
//BO RDO<br>
int64_t estDist = 0;<br>
for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)<br>
{<br>
- int32_t count = m_count[0][SAO_BO][classIdx];<br>
+ int32_t& count = m_count[0][SAO_BO][classIdx];<br>
int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];<br>
int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];<br>
<br>
@@ -1473,12 +1487,12 @@<br>
}<br>
<br>
// Estimate Best Position<br>
- double bestRDCostBO = MAX_DOUBLE;<br>
- int bestClassBO = 0;<br>
+ int64_t bestRDCostBO = MAX_INT64;<br>
+ int32_t bestClassBO = 0;<br>
<br>
for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++)<br>
{<br>
- double currentRDCost = 0.0;<br>
+ int64_t currentRDCost = 0;<br>
for (int j = i; j < i + SAO_NUM_OFFSET; j++)<br>
currentRDCost += costClasses[j];<br>
<br>
@@ -1498,21 +1512,21 @@<br>
m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + bestClassBO, bestClassBO, 0);<br>
<br>
uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();<br>
- double cost = (double)estDist + lambda[0] * (double)estRate;<br>
+ int64_t cost = calcSaoRdoCost(estDist, estRate, lambda[0]);<br>
<br>
- if (cost < dCostPartBest)<br>
+ if (cost < costPartBest)<br>
{<br>
- dCostPartBest = cost;<br>
+ costPartBest = cost;<br>
bestDist = estDist;<br>
<br>
lclCtuParam->mergeMode = SAO_MERGE_NONE;<br>
lclCtuParam->typeIdx = SAO_BO;<br>
lclCtuParam->bandPos = bestClassBO;<br>
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)<br>
- lclCtuParam->offset[classIdx] = (int)m_offset[0][SAO_BO][classIdx + bestClassBO];<br>
+ lclCtuParam->offset[classIdx] = m_offset[0][SAO_BO][classIdx + bestClassBO];<br>
}<br>
<br>
- rateDist = ((double)bestDist / lambda[0]);<br>
+ rateDist = bestDist / (lambda[0] >> 8);<br>
m_entropyCoder.load(m_rdContexts.temp);<br>
m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);<br>
m_entropyCoder.store(m_rdContexts.temp);<br>
@@ -1520,26 +1534,27 @@<br>
if (m_param->internalCsp == X265_CSP_I400)<br>
{<br>
uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();<br>
- bestCost = rateDist + (double)rate;<br>
+ bestCost = rateDist + rate;<br>
}<br>
}<br>
<br>
-void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int addr, double& rateDist, double* lambda, double &bestCost)<br>
+void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int32_t addr, int64_t& rateDist, int64_t* lambda, int64_t &bestCost)<br>
{<br>
int64_t bestDist = 0;<br>
int bestTypeIdx = -1;<br>
<br>
SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], &saoParam->ctuParam[2][addr] };<br>
<br>
- double costClasses[MAX_NUM_SAO_CLASS];<br>
- int distClasses[MAX_NUM_SAO_CLASS];<br>
- int bestClassBO[2] = { 0, 0 };<br>
+ int64_t costClasses[MAX_NUM_SAO_CLASS];<br>
+ int32_t distClasses[MAX_NUM_SAO_CLASS];<br>
+ int32_t bestClassBO[2] = { 0, 0 };<br>
<br>
m_entropyCoder.load(m_rdContexts.temp);<br>
m_entropyCoder.resetBits();<br>
m_entropyCoder.codeSaoType(0);<br>
<br>
- double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * lambda[1];<br>
+ uint32_t bits = m_entropyCoder.getNumberOfWrittenBits();<br>
+ int64_t costPartBest = calcSaoRdoCost(0, bits, lambda[1]);<br>
<br>
//EO RDO<br>
for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)<br>
@@ -1549,7 +1564,7 @@<br>
{<br>
for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)<br>
{<br>
- int32_t count = m_count[compIdx][typeIdx][classIdx];<br>
+ int32_t& count = m_count[compIdx][typeIdx][classIdx];<br>
int32_t& offsetOrg = m_offsetOrg[compIdx][typeIdx][classIdx];<br>
int32_t& offsetOut = m_offset[compIdx][typeIdx][classIdx];<br>
<br>
@@ -1566,11 +1581,11 @@<br>
m_entropyCoder.codeSaoOffsetEO(m_offset[compIdx + 1][typeIdx] + 1, typeIdx, compIdx + 1);<br>
<br>
uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();<br>
- double cost = (double)(estDist[0] + estDist[1]) + lambda[1] * (double)estRate;<br>
+ int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]), estRate, lambda[1]);<br>
<br>
- if (cost < dCostPartBest)<br>
+ if (cost < costPartBest)<br>
{<br>
- dCostPartBest = cost;<br>
+ costPartBest = cost;<br>
bestDist = (estDist[0] + estDist[1]);<br>
bestTypeIdx = typeIdx;<br>
}<br>
@@ -1584,7 +1599,7 @@<br>
lclCtuParam[compIdx]->typeIdx = bestTypeIdx;<br>
lclCtuParam[compIdx]->bandPos = 0;<br>
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)<br>
- lclCtuParam[compIdx]->offset[classIdx] = (int)m_offset[compIdx + 1][bestTypeIdx][classIdx + 1];<br>
+ lclCtuParam[compIdx]->offset[classIdx] = m_offset[compIdx + 1][bestTypeIdx][classIdx + 1];<br>
}<br>
}<br>
<br>
@@ -1594,11 +1609,11 @@<br>
// Estimate Best Position<br>
for (int compIdx = 1; compIdx < 3; compIdx++)<br>
{<br>
- double bestRDCostBO = MAX_DOUBLE;<br>
+ int64_t bestRDCostBO = MAX_INT64;<br>
<br>
for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)<br>
{<br>
- int32_t count = m_count[compIdx][SAO_BO][classIdx];<br>
+ int32_t& count = m_count[compIdx][SAO_BO][classIdx];<br>
int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];<br>
int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];<br>
<br>
@@ -1607,7 +1622,7 @@<br>
<br>
for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++)<br>
{<br>
- double currentRDCost = 0.0;<br>
+ int64_t currentRDCost = 0;<br>
for (int j = i; j < i + SAO_NUM_OFFSET; j++)<br>
currentRDCost += costClasses[j];<br>
<br>
@@ -1630,11 +1645,11 @@<br>
m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] + bestClassBO[compIdx], bestClassBO[compIdx], compIdx + 1);<br>
<br>
uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();<br>
- double cost = (double)(estDist[0] + estDist[1]) + lambda[1] * (double)estRate;<br>
+ int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]), estRate, lambda[1]);<br>
<br>
- if (cost < dCostPartBest)<br>
+ if (cost < costPartBest)<br>
{<br>
- dCostPartBest = cost;<br>
+ costPartBest = cost;<br>
bestDist = (estDist[0] + estDist[1]);<br>
<br>
for (int compIdx = 0; compIdx < 2; compIdx++)<br>
@@ -1643,11 +1658,11 @@<br>
lclCtuParam[compIdx]->typeIdx = SAO_BO;<br>
lclCtuParam[compIdx]->bandPos = bestClassBO[compIdx];<br>
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)<br>
- lclCtuParam[compIdx]->offset[classIdx] = (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]];<br>
+ lclCtuParam[compIdx]->offset[classIdx] = m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]];<br>
}<br>
}<br>
<br>
- rateDist += ((double)bestDist / lambda[1]);<br>
+ rateDist += (bestDist / (lambda[1] >> 8));<br>
m_entropyCoder.load(m_rdContexts.temp);<br>
<br>
if (saoParam->bSaoFlag[1])<br>
@@ -1657,12 +1672,12 @@<br>
m_entropyCoder.store(m_rdContexts.temp);<br>
<br>
uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();<br>
- bestCost = rateDist + (double)rate;<br>
+ bestCost = rateDist + rate;<br>
}<br>
else<br>
{<br>
uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();<br>
- bestCost = rateDist + (double)rate;<br>
+ bestCost = rateDist + rate;<br>
}<br>
}<br>
<br>
diff -r 9f27620a948b -r 70a0888d0703 source/encoder/sao.h<br>
--- a/source/encoder/sao.h Mon Apr 25 13:39:54 2016 +0530<br>
+++ b/source/encoder/sao.h Wed May 04 15:17:38 2016 +0530<br>
@@ -126,12 +126,13 @@<br>
void calcSaoStatsCu(int addr, int plane);<br>
void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);<br>
<br>
- void saoLumaComponentParamDist(SAOParam* saoParam, int addr, double& rateDist, double* lambda, double &bestCost);<br>
- void saoChromaComponentParamDist(SAOParam* saoParam, int addr, double& rateDist, double* lambda, double &bestCost);<br>
+ void saoLumaComponentParamDist(SAOParam* saoParam, int addr, int64_t& rateDist, int64_t* lambda, int64_t& bestCost);<br>
+ void saoChromaComponentParamDist(SAOParam* saoParam, int addr, int64_t& rateDist, int64_t* lambda, int64_t& bestCost);<br>
<br>
- void estIterOffset(int typeIdx, double lambda, int32_t count, int32_t offsetOrg, int& offset, int& distClasses, double& costClasses);<br>
+ void estIterOffset(int typeIdx, int64_t lambda, int32_t count, int32_t offsetOrg, int32_t& offset, int32_t& distClasses, int64_t& costClasses);<br>
void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);<br>
void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int addr);<br>
+ int64_t calcSaoRdoCost(int64_t distortion, uint32_t bits, int64_t lambda);<br>
<br>
void saoStatsInitialOffset(int planes);<br>
<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div>