[x265] [PATCH 3 of 4] framepp: Parallelism of SAO (saoLcuBasedOptimization mode only)
Min Chen
chenm003 at 163.com
Thu Aug 22 09:18:32 CEST 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1377155849 -28800
# Node ID 08631e01e5c0ee7f05e81d2dff89bc59534cebed
# Parent 78f36991d73034630b123c6b98d00e6fffde468f
framepp: Parallelism of SAO (saoLcuBasedOptimization mode only)
diff -r 78f36991d730 -r 08631e01e5c0 source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp
--- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp Thu Aug 22 15:16:40 2013 +0800
+++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp Thu Aug 22 15:17:29 2013 +0800
@@ -73,8 +73,12 @@
m_upBuff2 = NULL;
m_upBufft = NULL;
- m_tmpU1 = NULL;
- m_tmpU2 = NULL;
+ m_tmpU1[0] = NULL;
+ m_tmpU1[1] = NULL;
+ m_tmpU1[2] = NULL;
+ m_tmpU2[0] = NULL;
+ m_tmpU2[0] = NULL;
+ m_tmpU2[0] = NULL;
m_tmpL1 = NULL;
m_tmpL2 = NULL;
}
@@ -251,8 +255,14 @@
m_tmpL1 = new Pel[m_maxCUHeight + 1];
m_tmpL2 = new Pel[m_maxCUHeight + 1];
- m_tmpU1 = new Pel[m_picWidth];
- m_tmpU2 = new Pel[m_picWidth];
+
+ m_tmpU1[0] = new Pel[m_picWidth];
+ m_tmpU1[1] = new Pel[m_picWidth];
+ m_tmpU1[2] = new Pel[m_picWidth];
+
+ m_tmpU2[0] = new Pel[m_picWidth];
+ m_tmpU2[1] = new Pel[m_picWidth];
+ m_tmpU2[2] = new Pel[m_picWidth];
}
/** destroy SampleAdaptiveOffset memory.
@@ -320,15 +330,23 @@
delete [] m_tmpL2;
m_tmpL2 = NULL;
}
- if (m_tmpU1)
+ if (m_tmpU1[0])
{
- delete [] m_tmpU1;
- m_tmpU1 = NULL;
+ delete [] m_tmpU1[0];
+ delete [] m_tmpU1[1];
+ delete [] m_tmpU1[2];
+ m_tmpU1[0] = NULL;
+ m_tmpU1[1] = NULL;
+ m_tmpU1[2] = NULL;
}
- if (m_tmpU2)
+ if (m_tmpU2[0])
{
- delete [] m_tmpU2;
- m_tmpU2 = NULL;
+ delete [] m_tmpU2[0];
+ delete [] m_tmpU2[1];
+ delete [] m_tmpU2[2];
+ m_tmpU2[0] = NULL;
+ m_tmpU2[1] = NULL;
+ m_tmpU2[2] = NULL;
}
}
@@ -540,20 +558,11 @@
Void TComSampleAdaptiveOffset::destroyPicSaoInfo()
{
}
-
-/** sample adaptive offset process for one LCU
+/** sample adaptive offset process for one LCU crossing LCU boundary
* \param addr, iSaoType, yCbCr
*/
Void TComSampleAdaptiveOffset::processSaoCu(Int addr, Int saoType, Int yCbCr)
{
- processSaoCuOrg(addr, saoType, yCbCr);
-}
-
-/** sample adaptive offset process for one LCU crossing LCU boundary
- * \param addr, iSaoType, yCbCr
- */
-Void TComSampleAdaptiveOffset::processSaoCuOrg(Int addr, Int saoType, Int yCbCr)
-{
Int x, y;
TComDataCU *tmpCu = m_pic->getCU(addr);
Pel* rec;
@@ -632,7 +641,7 @@
rec -= (stride * (cuHeightTmp + 1));
tmpL = m_tmpL1;
- tmpU = &(m_tmpU1[lpelx]);
+ tmpU = &(m_tmpU1[yCbCr][lpelx]);
}
clipTbl = (yCbCr == 0) ? m_clipTable : m_chromaClipTable;
@@ -867,7 +876,7 @@
picWidthTmp = m_picWidth >> 1;
}
- memcpy(m_tmpU1, rec, sizeof(Pel) * picWidthTmp);
+ memcpy(m_tmpU1[yCbCr], rec, sizeof(Pel) * picWidthTmp);
Int i;
UInt edgeType;
@@ -922,7 +931,7 @@
rec -= (stride << 1);
- memcpy(m_tmpU2, rec, sizeof(Pel) * picWidthTmp);
+ memcpy(m_tmpU2[yCbCr], rec, sizeof(Pel) * picWidthTmp);
for (idxX = 0; idxX < frameWidthInCU; idxX++)
{
@@ -1006,15 +1015,174 @@
}
}
- tmpUSwap = m_tmpU1;
- m_tmpU1 = m_tmpU2;
- m_tmpU2 = tmpUSwap;
+ tmpUSwap = m_tmpU1[yCbCr];
+ m_tmpU1[yCbCr] = m_tmpU2[yCbCr];
+ m_tmpU2[yCbCr] = tmpUSwap;
}
}
-/** Reset SAO LCU part
- * \param saoLcuParam
+/** Process SAO all units
+ * \param saoLcuParam SAO LCU parameters
+ * \param oneUnitFlag one unit flag
+ * \param yCbCr color componet index
*/
+Void TComSampleAdaptiveOffset::processSaoUnitRow(SaoLcuParam* saoLcuParam, int idxY, Int yCbCr)
+{
+ Pel *rec;
+ Int picWidthTmp;
+
+ if (yCbCr == 0)
+ {
+ rec = m_pic->getPicYuvRec()->getLumaAddr();
+ picWidthTmp = m_picWidth;
+ }
+ else if (yCbCr == 1)
+ {
+ rec = m_pic->getPicYuvRec()->getCbAddr();
+ picWidthTmp = m_picWidth >> 1;
+ }
+ else
+ {
+ rec = m_pic->getPicYuvRec()->getCrAddr();
+ picWidthTmp = m_picWidth >> 1;
+ }
+
+ if (idxY == 0)
+ memcpy(m_tmpU1[yCbCr], rec, sizeof(Pel) * picWidthTmp);
+
+ Int i;
+ UInt edgeType;
+ Pel* lumaTable = NULL;
+ Pel* clipTable = NULL;
+ Int* offsetBo = NULL;
+ Int typeIdx;
+
+ Int offset[LUMA_GROUP_NUM + 1];
+ Int idxX;
+ Int addr;
+ Int frameWidthInCU = m_pic->getFrameWidthInCU();
+ Int stride;
+ Pel *tmpUSwap;
+ Int sChroma = (yCbCr == 0) ? 0 : 1;
+ Bool mergeLeftFlag;
+ Int saoBitIncrease = (yCbCr == 0) ? m_saoBitIncreaseY : m_saoBitIncreaseC;
+
+ offsetBo = (yCbCr == 0) ? m_offsetBo : m_chromaOffsetBo;
+
+ offset[0] = 0;
+ {
+ addr = idxY * frameWidthInCU;
+ if (yCbCr == 0)
+ {
+ rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
+ stride = m_pic->getStride();
+ picWidthTmp = m_picWidth;
+ }
+ else if (yCbCr == 1)
+ {
+ rec = m_pic->getPicYuvRec()->getCbAddr(addr);
+ stride = m_pic->getCStride();
+ picWidthTmp = m_picWidth >> 1;
+ }
+ else
+ {
+ rec = m_pic->getPicYuvRec()->getCrAddr(addr);
+ stride = m_pic->getCStride();
+ picWidthTmp = m_picWidth >> 1;
+ }
+
+ // pRec += stride*(m_uiMaxCUHeight-1);
+ for (i = 0; i < (m_maxCUHeight >> sChroma) + 1; i++)
+ {
+ m_tmpL1[i] = rec[0];
+ rec += stride;
+ }
+
+ rec -= (stride << 1);
+
+ memcpy(m_tmpU2[yCbCr], rec, sizeof(Pel) * picWidthTmp);
+
+ for (idxX = 0; idxX < frameWidthInCU; idxX++)
+ {
+ addr = idxY * frameWidthInCU + idxX;
+
+ typeIdx = saoLcuParam[addr].typeIdx;
+ mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag;
+
+ if (typeIdx >= 0)
+ {
+ if (!mergeLeftFlag)
+ {
+ if (typeIdx == SAO_BO)
+ {
+ for (i = 0; i < SAO_MAX_BO_CLASSES + 1; i++)
+ {
+ offset[i] = 0;
+ }
+
+ for (i = 0; i < saoLcuParam[addr].length; i++)
+ {
+ offset[(saoLcuParam[addr].subTypeIdx + i) % SAO_MAX_BO_CLASSES + 1] = saoLcuParam[addr].offset[i] << saoBitIncrease;
+ }
+
+ lumaTable = (yCbCr == 0) ? m_lumaTableBo : m_chromaTableBo;
+ clipTable = (yCbCr == 0) ? m_clipTable : m_chromaClipTable;
+
+ for (i = 0; i < (1 << X265_DEPTH); i++)
+ {
+ offsetBo[i] = clipTable[i + offset[lumaTable[i]]];
+ }
+ }
+ if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+ {
+ for (i = 0; i < saoLcuParam[addr].length; i++)
+ {
+ offset[i + 1] = saoLcuParam[addr].offset[i] << saoBitIncrease;
+ }
+
+ for (edgeType = 0; edgeType < 6; edgeType++)
+ {
+ m_offsetEo[edgeType] = offset[m_eoTable[edgeType]];
+ }
+ }
+ }
+ processSaoCu(addr, typeIdx, yCbCr);
+ }
+ else
+ {
+ if (idxX != (frameWidthInCU - 1))
+ {
+ if (yCbCr == 0)
+ {
+ rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
+ stride = m_pic->getStride();
+ }
+ else if (yCbCr == 1)
+ {
+ rec = m_pic->getPicYuvRec()->getCbAddr(addr);
+ stride = m_pic->getCStride();
+ }
+ else
+ {
+ rec = m_pic->getPicYuvRec()->getCrAddr(addr);
+ stride = m_pic->getCStride();
+ }
+ Int widthShift = m_maxCUWidth >> sChroma;
+ for (i = 0; i < (m_maxCUHeight >> sChroma) + 1; i++)
+ {
+ m_tmpL1[i] = rec[widthShift - 1];
+ rec += stride;
+ }
+ }
+ }
+ }
+
+ tmpUSwap = m_tmpU1[yCbCr];
+ m_tmpU1[yCbCr] = m_tmpU2[yCbCr];
+ m_tmpU2[yCbCr] = tmpUSwap;
+ }
+}
+
Void TComSampleAdaptiveOffset::resetLcuPart(SaoLcuParam* saoLcuParam)
{
Int i, j;
@@ -1133,7 +1301,6 @@
}
static Void xPCMRestoration(TComPic* pic);
-static Void xPCMCURestoration(TComDataCU* cu, UInt absZOrderIdx, UInt depth);
static Void xPCMSampleRestoration(TComDataCU* cu, UInt absZOrderIdx, UInt depth, TextType ttText);
/** PCM LF disable process.
@@ -1172,7 +1339,7 @@
* \param depth CU depth
* \returns Void
*/
-static Void xPCMCURestoration(TComDataCU* cu, UInt absZOrderIdx, UInt depth)
+Void xPCMCURestoration(TComDataCU* cu, UInt absZOrderIdx, UInt depth)
{
TComPic* pic = cu->getPic();
UInt curNumParts = pic->getNumPartInCU() >> (depth << 1);
diff -r 78f36991d730 -r 08631e01e5c0 source/Lib/TLibCommon/TComSampleAdaptiveOffset.h
--- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.h Thu Aug 22 15:16:40 2013 +0800
+++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.h Thu Aug 22 15:17:29 2013 +0800
@@ -169,8 +169,8 @@
Int *m_upBufft;
TComPicYuv* m_tmpYuv; //!< temporary picture buffer pointer when non-across slice/tile boundary SAO is enabled
- Pel* m_tmpU1;
- Pel* m_tmpU2;
+ Pel* m_tmpU1[3];
+ Pel* m_tmpU2[3];
Pel* m_tmpL1;
Pel* m_tmpL2;
Int m_maxNumOffsetsPerPic;
@@ -193,10 +193,9 @@
static Void freeSaoParam(SAOParam* saoParam);
Void SAOProcess(SAOParam* saoParam);
- Void processSaoCu(Int addr, Int saoType, Int yCbCr);
Pel* getPicYuvAddr(TComPicYuv* picYuv, Int yCbCr, Int addr = 0);
- Void processSaoCuOrg(Int addr, Int partIdx, Int yCbCr); //!< LCU-basd SAO process without slice granularity
+ Void processSaoCu(Int addr, Int partIdx, Int yCbCr); //!< LCU-basd SAO process without slice granularity
Void createPicSaoInfo(TComPic* pic);
Void destroyPicSaoInfo();
@@ -204,6 +203,7 @@
Void convertQT2SaoUnit(SAOParam* saoParam, UInt partIdx, Int yCbCr);
Void convertOnePart2SaoUnit(SAOParam *saoParam, UInt partIdx, Int yCbCr);
Void processSaoUnitAll(SaoLcuParam* saoLcuParam, Bool oneUnitFlag, Int yCbCr);
+ Void processSaoUnitRow(SaoLcuParam* saoLcuParam, int idxY, Int yCbCr);
Void setSaoLcuBoundary(int bVal) { m_saoLcuBoundary = bVal != 0; }
Bool getSaoLcuBoundary() { return m_saoLcuBoundary; }
@@ -216,6 +216,8 @@
Void copySaoUnit(SaoLcuParam* saoUnitDst, SaoLcuParam* saoUnitSrc);
};
Void PCMLFDisableProcess(TComPic* pic);
+Void xPCMCURestoration(TComDataCU* cu, UInt absZOrderIdx, UInt depth);
+
//! \}
#endif // ifndef __TCOMSAMPLEADAPTIVEOFFSET__
diff -r 78f36991d730 -r 08631e01e5c0 source/Lib/TLibEncoder/TEncSampleAdaptiveOffset.cpp
--- a/source/Lib/TLibEncoder/TEncSampleAdaptiveOffset.cpp Thu Aug 22 15:16:40 2013 +0800
+++ b/source/Lib/TLibEncoder/TEncSampleAdaptiveOffset.cpp Thu Aug 22 15:17:29 2013 +0800
@@ -45,22 +45,25 @@
//! \{
TEncSampleAdaptiveOffset::TEncSampleAdaptiveOffset()
+ : m_entropyCoder(NULL)
+ , m_rdSbacCoders(NULL)
+ , m_rdGoOnSbacCoder(NULL)
+ , m_binCoderCABAC(NULL)
+ , m_count(NULL)
+ , m_offset(NULL)
+ , m_offsetOrg(NULL)
+ , m_countPreDblk(NULL)
+ , m_offsetOrgPreDblk(NULL)
+ , m_rate(NULL)
+ , m_dist(NULL)
+ , m_cost(NULL)
+ , m_costPartBest(NULL)
+ , m_distOrg(NULL)
+ , m_typePartBest(NULL)
+ , lumaLambda(0.)
+ , chromaLambd(0.)
+ , depth(0)
{
- m_entropyCoder = NULL;
- m_rdSbacCoders = NULL;
- m_rdGoOnSbacCoder = NULL;
- m_binCoderCABAC = NULL;
- m_count = NULL;
- m_offset = NULL;
- m_offsetOrg = NULL;
- m_countPreDblk = NULL;
- m_offsetOrgPreDblk = NULL;
- m_rate = NULL;
- m_dist = NULL;
- m_cost = NULL;
- m_costPartBest = NULL;
- m_distOrg = NULL;
- m_typePartBest = NULL;
m_depthSaoRate[0][0] = 0;
m_depthSaoRate[0][1] = 0;
m_depthSaoRate[0][2] = 0;
@@ -69,6 +72,11 @@
m_depthSaoRate[1][1] = 0;
m_depthSaoRate[1][2] = 0;
m_depthSaoRate[1][3] = 0;
+
+ m_saoBitIncreaseY = max(X265_DEPTH - 10, 0);
+ m_saoBitIncreaseC = max(X265_DEPTH - 10, 0);
+ m_offsetThY = 1 << min(X265_DEPTH - 5, 5);
+ m_offsetThC = 1 << min(X265_DEPTH - 5, 5);
}
TEncSampleAdaptiveOffset::~TEncSampleAdaptiveOffset()
@@ -850,19 +858,11 @@
}
}
-/** Calculate SAO statistics for current LCU
+/** Calculate SAO statistics for current LCU without non-crossing slice
* \param addr, partIdx, yCbCr
*/
Void TEncSampleAdaptiveOffset::calcSaoStatsCu(Int addr, Int partIdx, Int yCbCr)
{
- calcSaoStatsCuOrg(addr, partIdx, yCbCr);
-}
-
-/** Calculate SAO statistics for current LCU without non-crossing slice
- * \param addr, partIdx, yCbCr
- */
-Void TEncSampleAdaptiveOffset::calcSaoStatsCuOrg(Int addr, Int partIdx, Int yCbCr)
-{
Int x, y;
TComDataCU *pTmpCu = m_pic->getCU(addr);
TComSPS *pTmpSPS = m_pic->getSlice()->getSPS();
@@ -1536,31 +1536,23 @@
* \param dLambdaLuma
* \param lambdaChroma
*/
-Void TEncSampleAdaptiveOffset::SAOProcess(SAOParam *saoParam, Double lambdaLuma, Double lambdaChroma, Int depth)
+Void TEncSampleAdaptiveOffset::SAOProcess(SAOParam *saoParam)
{
- m_saoBitIncreaseY = max(X265_DEPTH - 10, 0);
- m_saoBitIncreaseC = max(X265_DEPTH - 10, 0);
- m_offsetThY = 1 << min(X265_DEPTH - 5, 5);
- m_offsetThC = 1 << min(X265_DEPTH - 5, 5);
- resetSAOParam(saoParam);
- if (!m_saoLcuBasedOptimization || !m_saoLcuBoundary)
- {
- resetStats();
- }
- Double costFinal = 0;
if (m_saoLcuBasedOptimization)
{
- rdoSaoUnitAll(saoParam, lambdaLuma, lambdaChroma, depth);
+ // Why be here?
+ assert(0);
+
+ rdoSaoUnitAll(saoParam, lumaLambda, chromaLambd, depth);
}
else
{
+ Double costFinal = 0;
saoParam->bSaoFlag[0] = 1;
saoParam->bSaoFlag[1] = 0;
costFinal = 0;
- Double lambdaRdo = lambdaLuma;
- resetStats();
getSaoStats(saoParam->saoPart[0], 0);
- runQuadTreeDecision(saoParam->saoPart[0], 0, costFinal, m_maxSplitLevel, lambdaRdo, 0);
+ runQuadTreeDecision(saoParam->saoPart[0], 0, costFinal, m_maxSplitLevel, lumaLambda, 0);
saoParam->bSaoFlag[0] = costFinal < 0 ? 1 : 0;
if (saoParam->bSaoFlag[0])
{
@@ -1897,6 +1889,199 @@
}
}
+void TEncSampleAdaptiveOffset::rdoSaoUnitRowInit(SAOParam *saoParam)
+{
+ saoParam->bSaoFlag[0] = true;
+ saoParam->bSaoFlag[1] = true;
+ saoParam->oneUnitFlag[0] = false;
+ saoParam->oneUnitFlag[1] = false;
+ saoParam->oneUnitFlag[2] = false;
+
+ numNoSao[0] = 0; // Luma
+ numNoSao[1] = 0; // Chroma
+ if (depth > 0 && m_depthSaoRate[0][depth - 1] > SAO_ENCODING_RATE)
+ {
+ saoParam->bSaoFlag[0] = false;
+ }
+ if (depth > 0 && m_depthSaoRate[1][depth - 1] > SAO_ENCODING_RATE_CHROMA)
+ {
+ saoParam->bSaoFlag[1] = false;
+ }
+}
+
+Void TEncSampleAdaptiveOffset::rdoSaoUnitRowEnd(SAOParam *saoParam, int numlcus)
+{
+
+ if (!saoParam->bSaoFlag[0])
+ {
+ m_depthSaoRate[0][depth] = 1.0;
+ }
+ else
+ {
+ m_depthSaoRate[0][depth] = numNoSao[0] / ((Double)numlcus);
+ }
+ if (!saoParam->bSaoFlag[1])
+ {
+ m_depthSaoRate[1][depth] = 1.0;
+ }
+ else
+ {
+ m_depthSaoRate[1][depth] = numNoSao[1] / ((Double)numlcus * 2);
+ }
+}
+
+Void TEncSampleAdaptiveOffset::rdoSaoUnitRow(SAOParam *saoParam, Int idxY)
+{
+ Int idxX;
+ Int frameWidthInCU = saoParam->numCuInWidth;
+ Int j, k;
+ Int addr = 0;
+ Int addrUp = -1;
+ Int addrLeft = -1;
+ Int compIdx = 0;
+ SaoLcuParam mergeSaoParam[3][2];
+ Double compDistortion[3];
+
+ {
+ for (idxX = 0; idxX < frameWidthInCU; idxX++)
+ {
+ addr = idxX + frameWidthInCU * idxY;
+ addrUp = addr < frameWidthInCU ? -1 : idxX + frameWidthInCU * (idxY - 1);
+ addrLeft = idxX == 0 ? -1 : idxX - 1 + frameWidthInCU * idxY;
+ Int allowMergeLeft = 1;
+ Int allowMergeUp = 1;
+ UInt rate;
+ Double bestCost, mergeCost;
+ if (idxX == 0)
+ {
+ allowMergeLeft = 0;
+ }
+ if (idxY == 0)
+ {
+ allowMergeUp = 0;
+ }
+
+ compDistortion[0] = 0;
+ compDistortion[1] = 0;
+ compDistortion[2] = 0;
+ m_rdGoOnSbacCoder->load(m_rdSbacCoders[0][CI_CURR_BEST]);
+ if (allowMergeLeft)
+ {
+ m_entropyCoder->m_pcEntropyCoderIf->codeSaoMerge(0);
+ }
+ if (allowMergeUp)
+ {
+ m_entropyCoder->m_pcEntropyCoderIf->codeSaoMerge(0);
+ }
+ m_rdGoOnSbacCoder->store(m_rdSbacCoders[0][CI_TEMP_BEST]);
+ // reset stats Y, Cb, Cr
+ for (compIdx = 0; compIdx < 3; compIdx++)
+ {
+ for (j = 0; j < MAX_NUM_SAO_TYPE; j++)
+ {
+ for (k = 0; k < MAX_NUM_SAO_CLASS; k++)
+ {
+ m_offset[compIdx][j][k] = 0;
+ if (m_saoLcuBasedOptimization && m_saoLcuBoundary)
+ {
+ m_count[compIdx][j][k] = m_countPreDblk[addr][compIdx][j][k];
+ m_offsetOrg[compIdx][j][k] = m_offsetOrgPreDblk[addr][compIdx][j][k];
+ }
+ else
+ {
+ m_count[compIdx][j][k] = 0;
+ m_offsetOrg[compIdx][j][k] = 0;
+ }
+ }
+ }
+
+ saoParam->saoLcuParam[compIdx][addr].typeIdx = -1;
+ saoParam->saoLcuParam[compIdx][addr].mergeUpFlag = 0;
+ saoParam->saoLcuParam[compIdx][addr].mergeLeftFlag = 0;
+ saoParam->saoLcuParam[compIdx][addr].subTypeIdx = 0;
+ if ((compIdx == 0 && saoParam->bSaoFlag[0]) || (compIdx > 0 && saoParam->bSaoFlag[1]))
+ {
+ calcSaoStatsCu(addr, compIdx, compIdx);
+ }
+ }
+
+ saoComponentParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft, 0, lumaLambda, &mergeSaoParam[0][0], &compDistortion[0]);
+ sao2ChromaParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft, chromaLambd, &mergeSaoParam[1][0], &mergeSaoParam[2][0], &compDistortion[0]);
+ if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
+ {
+ // Cost of new SAO_params
+ m_rdGoOnSbacCoder->load(m_rdSbacCoders[0][CI_CURR_BEST]);
+ m_rdGoOnSbacCoder->resetBits();
+ if (allowMergeLeft)
+ {
+ m_entropyCoder->m_pcEntropyCoderIf->codeSaoMerge(0);
+ }
+ if (allowMergeUp)
+ {
+ m_entropyCoder->m_pcEntropyCoderIf->codeSaoMerge(0);
+ }
+ for (compIdx = 0; compIdx < 3; compIdx++)
+ {
+ if ((compIdx == 0 && saoParam->bSaoFlag[0]) || (compIdx > 0 && saoParam->bSaoFlag[1]))
+ {
+ m_entropyCoder->encodeSaoOffset(&saoParam->saoLcuParam[compIdx][addr], compIdx);
+ }
+ }
+
+ rate = m_entropyCoder->getNumberOfWrittenBits();
+ bestCost = compDistortion[0] + (Double)rate;
+ m_rdGoOnSbacCoder->store(m_rdSbacCoders[0][CI_TEMP_BEST]);
+
+ // Cost of Merge
+ for (Int mergeUp = 0; mergeUp < 2; ++mergeUp)
+ {
+ if ((allowMergeLeft && (mergeUp == 0)) || (allowMergeUp && (mergeUp == 1)))
+ {
+ m_rdGoOnSbacCoder->load(m_rdSbacCoders[0][CI_CURR_BEST]);
+ m_rdGoOnSbacCoder->resetBits();
+ if (allowMergeLeft)
+ {
+ m_entropyCoder->m_pcEntropyCoderIf->codeSaoMerge(1 - mergeUp);
+ }
+ if (allowMergeUp && (mergeUp == 1))
+ {
+ m_entropyCoder->m_pcEntropyCoderIf->codeSaoMerge(1);
+ }
+
+ rate = m_entropyCoder->getNumberOfWrittenBits();
+ mergeCost = compDistortion[mergeUp + 1] + (Double)rate;
+ if (mergeCost < bestCost)
+ {
+ bestCost = mergeCost;
+ m_rdGoOnSbacCoder->store(m_rdSbacCoders[0][CI_TEMP_BEST]);
+ for (compIdx = 0; compIdx < 3; compIdx++)
+ {
+ mergeSaoParam[compIdx][mergeUp].mergeLeftFlag = 1 - mergeUp;
+ mergeSaoParam[compIdx][mergeUp].mergeUpFlag = mergeUp;
+ if ((compIdx == 0 && saoParam->bSaoFlag[0]) || (compIdx > 0 && saoParam->bSaoFlag[1]))
+ {
+ copySaoUnit(&saoParam->saoLcuParam[compIdx][addr], &mergeSaoParam[compIdx][mergeUp]);
+ }
+ }
+ }
+ }
+ }
+
+ if (saoParam->saoLcuParam[0][addr].typeIdx == -1)
+ {
+ numNoSao[0]++;
+ }
+ if (saoParam->saoLcuParam[1][addr].typeIdx == -1)
+ {
+ numNoSao[1] += 2;
+ }
+ m_rdGoOnSbacCoder->load(m_rdSbacCoders[0][CI_TEMP_BEST]);
+ m_rdGoOnSbacCoder->store(m_rdSbacCoders[0][CI_CURR_BEST]);
+ }
+ }
+ }
+}
+
/** rate distortion optimization of SAO unit
* \param saoParam SAO parameters
* \param addr address
diff -r 78f36991d730 -r 08631e01e5c0 source/Lib/TLibEncoder/TEncSampleAdaptiveOffset.h
--- a/source/Lib/TLibEncoder/TEncSampleAdaptiveOffset.h Thu Aug 22 15:16:40 2013 +0800
+++ b/source/Lib/TLibEncoder/TEncSampleAdaptiveOffset.h Thu Aug 22 15:17:29 2013 +0800
@@ -78,6 +78,10 @@
Double m_depthSaoRate[2][4];
public:
+ double lumaLambda;
+ double chromaLambd;
+ int depth;
+ Int numNoSao[2];
TEncSampleAdaptiveOffset();
virtual ~TEncSampleAdaptiveOffset();
@@ -85,7 +89,7 @@
Void startSaoEnc(TComPic* pic, TEncEntropy* entropyCoder, TEncSbac* rdGoOnSbacCoder);
Void endSaoEnc();
Void resetStats();
- Void SAOProcess(SAOParam *saoParam, Double lambda, Double lambdaChroma, Int depth);
+ Void SAOProcess(SAOParam *saoParam);
Void runQuadTreeDecision(SAOQTPart *psQTPart, Int partIdx, Double &costFinal, Int maxLevel, Double lambda, Int yCbCr);
Void rdoSaoOnePart(SAOQTPart *psQTPart, Int partIdx, Double lambda, Int yCbCr);
@@ -94,7 +98,6 @@
Void getSaoStats(SAOQTPart *psQTPart, Int yCbCr);
Void calcSaoStatsCu(Int addr, Int partIdx, Int yCbCr);
Void calcSaoStatsBlock(Pel* recStart, Pel* orgStart, Int stride, Int64** stats, Int64** counts, UInt width, UInt height, Bool* bBorderAvail, Int yCbCr);
- Void calcSaoStatsCuOrg(Int addr, Int partIdx, Int yCbCr);
Void calcSaoStatsRowCus_BeforeDblk(TComPic* pic, Int idxY);
Void destroyEncBuffer();
Void createEncBuffer();
@@ -109,6 +112,10 @@
Void setMaxNumOffsetsPerPic(Int val) { m_maxNumOffsetsPerPic = val; }
Int getMaxNumOffsetsPerPic() { return m_maxNumOffsetsPerPic; }
+
+ Void rdoSaoUnitRowInit(SAOParam *saoParam);
+ Void rdoSaoUnitRowEnd(SAOParam *saoParam, int numlcus);
+ Void rdoSaoUnitRow(SAOParam *saoParam, Int idxY);
};
//! \}
diff -r 78f36991d730 -r 08631e01e5c0 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Thu Aug 22 15:16:40 2013 +0800
+++ b/source/encoder/frameencoder.cpp Thu Aug 22 15:17:29 2013 +0800
@@ -88,8 +88,6 @@
m_numRows = numRows;
row_delay = (m_cfg->param.saoLcuBasedOptimization && m_cfg->param.saoLcuBoundary) ? 2 : 1;;
- m_frameFilter.init(top, numRows);
-
m_rows = new CTURow[m_numRows];
for (int i = 0; i < m_numRows; ++i)
{
@@ -102,6 +100,8 @@
m_pool = NULL;
}
+ m_frameFilter.init(top, numRows, getEntropyCoder(0), getRDGoOnSbacCoder(0));
+
// initialize SPS
top->xInitSPS(&m_sps);
@@ -352,7 +352,7 @@
setCrDistortionWeight(weight);
// for RDOQ
- setQPLambda(qp, lambda, lambda / weight);
+ setQPLambda(qp, lambda, lambda / weight, slice->getDepth());
// For SAO
slice->setLambda(lambda, lambda / weight);
@@ -529,26 +529,22 @@
}
/* use the main bitstream buffer for storing the marshaled picture */
- entropyCoder->setBitstream(NULL);
-
if (m_sps.getUseSAO())
{
- // set entropy coder for RD
- entropyCoder->setEntropyCoder(&m_sbacCoder, slice);
- entropyCoder->resetEntropy();
- entropyCoder->setBitstream(&m_bitCounter);
+ SAOParam* saoParam = pic->getPicSym()->getSaoParam();
- // CHECK_ME: I think the SAO uses a temp Sbac only, so I always use [0], am I right?
- getSAO()->startSaoEnc(pic, entropyCoder, getRDGoOnSbacCoder(0));
-
- SAOParam* saoParam = pic->getPicSym()->getSaoParam();
- getSAO()->SAOProcess(saoParam, slice->getLambdaLuma(), slice->getLambdaChroma(), slice->getDepth());
- getSAO()->endSaoEnc();
- PCMLFDisableProcess(pic);
+ if (!getSAO()->getSaoLcuBasedOptimization())
+ {
+ getSAO()->SAOProcess(saoParam);
+ getSAO()->endSaoEnc();
+ PCMLFDisableProcess(pic);
+ }
slice->setSaoEnabledFlag((saoParam->bSaoFlag[0] == 1) ? true : false);
}
+ entropyCoder->setBitstream(NULL);
+
// Reconstruction slice
slice->setNextSlice(true);
determineSliceBounds(pic);
diff -r 78f36991d730 -r 08631e01e5c0 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Thu Aug 22 15:16:40 2013 +0800
+++ b/source/encoder/frameencoder.h Thu Aug 22 15:17:29 2013 +0800
@@ -75,12 +75,15 @@
}
}
- void setQPLambda(Int QP, double lumaLambda, double chromaLambda)
+ void setQPLambda(Int QP, double lumaLambda, double chromaLambda, int depth)
{
for (int i = 0; i < m_numRows; i++)
{
m_rows[i].m_search.setQPLambda(QP, lumaLambda, chromaLambda);
}
+ m_frameFilter.m_sao.lumaLambda = lumaLambda;
+ m_frameFilter.m_sao.chromaLambd = chromaLambda;
+ m_frameFilter.m_sao.depth = depth;
}
void setCbDistortionWeight(double weight)
diff -r 78f36991d730 -r 08631e01e5c0 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Thu Aug 22 15:16:40 2013 +0800
+++ b/source/encoder/framefilter.cpp Thu Aug 22 15:17:29 2013 +0800
@@ -37,6 +37,8 @@
, m_cfg(NULL)
, m_pic(NULL)
, active_lft(FALSE)
+ , m_entropyCoder(NULL)
+ , m_rdGoOnSbacCoder(NULL)
{}
void FrameFilter::destroy()
@@ -73,11 +75,15 @@
return false;
}
-void FrameFilter::init(TEncTop *top, int numRows)
+void FrameFilter::init(TEncTop *top, int numRows, TEncEntropy* entropyCoder, TEncSbac* rdGoOnSbacCoder)
{
m_cfg = top;
m_numRows = numRows;
+ // NOTE: for sao only, DON'T use before first row finished
+ m_entropyCoder = entropyCoder;
+ m_rdGoOnSbacCoder = rdGoOnSbacCoder;
+
if (top->param.bEnableLoopFilter)
{
m_loopFilter.create(g_maxCUDepth);
@@ -99,9 +105,12 @@
active_lft = FALSE;
if (m_cfg->param.bEnableLoopFilter)
{
- if (m_cfg->param.saoLcuBasedOptimization && m_cfg->param.saoLcuBoundary)
- m_sao.resetStats();
+ m_sao.resetStats();
m_sao.createPicSaoInfo(pic);
+
+ SAOParam* saoParam = pic->getPicSym()->getSaoParam();
+ m_sao.resetSAOParam(saoParam);
+ m_sao.rdoSaoUnitRowInit(saoParam);
}
if (m_cfg->param.bEnableLoopFilter && m_pool && m_cfg->param.bEnableWavefront)
@@ -139,6 +148,13 @@
// Called by worker threads
+ // NOTE: We are here only active both of loopfilter and sao, and row 0 always finished, so we can safe to reuse row[0]'s data
+ if (row == 0)
+ {
+ // CHECK_ME: I think the SAO uses a temp Sbac only, so I always use [0], am I right?
+ m_sao.startSaoEnc(m_pic, m_entropyCoder, m_rdGoOnSbacCoder);
+ }
+
const uint32_t numCols = m_pic->getPicSym()->getFrameWidthInCU();
const uint32_t lineStartCUAddr = row * numCols;
@@ -167,9 +183,61 @@
m_loopFilter.loopFilterCU(cu_prev, EDGE_HOR);
}
+ // SAO
+ SAOParam* saoParam = m_pic->getPicSym()->getSaoParam();
+ if (m_sao.getSaoLcuBasedOptimization())
+ {
+ m_sao.rdoSaoUnitRow(saoParam, row);
+
+ // NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug?
+ if (row > 0)
+ {
+ // NOTE: these flag is not use in this mode
+ assert(saoParam->oneUnitFlag[0] == false);
+ assert(saoParam->oneUnitFlag[1] == false);
+ assert(saoParam->oneUnitFlag[2] == false);
+
+ if (saoParam->bSaoFlag[0])
+ {
+ m_sao.processSaoUnitRow(saoParam->saoLcuParam[0], row - 1, 0);
+ }
+ if (saoParam->bSaoFlag[1])
+ {
+ m_sao.processSaoUnitRow(saoParam->saoLcuParam[1], row - 1, 1);
+ m_sao.processSaoUnitRow(saoParam->saoLcuParam[2], row - 1, 2);
+ }
+
+ // TODO: this code is NOT VERIFY because TransformSkip and PCM mode have some bugs, they always not active!
+ Bool bPCMFilter = (m_pic->getSlice()->getSPS()->getUsePCM() && m_pic->getSlice()->getSPS()->getPCMFilterDisableFlag()) ? true : false;
+ if (bPCMFilter || m_pic->getSlice()->getPPS()->getTransquantBypassEnableFlag())
+ {
+ for (UInt col = 0; col < numCols; col++)
+ {
+ const uint32_t cuAddr = lineStartCUAddr + col;
+ TComDataCU* cu = m_pic->getCU(cuAddr);
+
+ xPCMCURestoration(cu, 0, 0);
+ }
+ }
+ }
+ }
+
// this row of CTUs has been encoded
if (row == m_numRows - 1)
{
+ m_sao.rdoSaoUnitRowEnd(saoParam, m_pic->getNumCUsInFrame());
+
+ // Process Last row of SAO
+ if (saoParam->bSaoFlag[0])
+ {
+ m_sao.processSaoUnitRow(saoParam->saoLcuParam[0], row, 0);
+ }
+ if (saoParam->bSaoFlag[1])
+ {
+ m_sao.processSaoUnitRow(saoParam->saoLcuParam[1], row, 1);
+ m_sao.processSaoUnitRow(saoParam->saoLcuParam[2], row, 2);
+ }
+
m_completionEvent.trigger();
}
}
diff -r 78f36991d730 -r 08631e01e5c0 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Thu Aug 22 15:16:40 2013 +0800
+++ b/source/encoder/framefilter.h Thu Aug 22 15:17:29 2013 +0800
@@ -48,7 +48,7 @@
virtual ~FrameFilter() {}
- void init(TEncTop *top, int numRows);
+ void init(TEncTop *top, int numRows, TEncEntropy* entropyCoder, TEncSbac* rdGoOnSbacCoder);
void destroy();
@@ -72,6 +72,8 @@
TComLoopFilter m_loopFilter;
TEncSampleAdaptiveOffset m_sao;
+ TEncEntropy* m_entropyCoder;
+ TEncSbac* m_rdGoOnSbacCoder;
int m_numRows;
// TODO: if you want thread priority logic, add col here
More information about the x265-devel
mailing list