[x265] [PATCH 1 of 2] improve performance by full row process
Min Chen
chenm003 at 163.com
Thu Feb 4 06:29:50 CET 2016
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1454563778 -28800
# Node ID ad8ebeffdda44378dd93b787215a937a26be980e
# Parent dc62b47dd0d98f732165345883edac55320baec1
improve performance by full row process
---
source/encoder/framefilter.cpp | 197 +++++++++++++++++++++++++++++++++--
source/encoder/framefilter.h | 4 +
source/encoder/sao.cpp | 224 ++++++++++++++++++++++++++++++++++++++++
source/encoder/sao.h | 4 +-
4 files changed, 418 insertions(+), 11 deletions(-)
diff -r dc62b47dd0d9 -r ad8ebeffdda4 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Mon Jan 25 14:59:50 2016 +0530
+++ b/source/encoder/framefilter.cpp Thu Feb 04 13:29:38 2016 +0800
@@ -174,6 +174,22 @@
restoreOrigLosslessYuv(cu, frame, absPartIdx);
}
+void FrameFilter::ParallelFilter::processSaoPcmRow(int startCol)
+{
+ if (m_encData->m_slice->m_pps->bTransquantBypassEnabled)
+ {
+ const CUGeom* cuGeoms = m_frameFilter->m_frameEncoder->m_cuGeoms;
+ const uint32_t* ctuGeomMap = m_frameFilter->m_frameEncoder->m_ctuGeomMap;
+
+ for (int col = startCol; col < m_frameFilter->m_numCols; col++)
+ {
+ uint32_t cuAddr = m_rowAddr + col;
+ const CUData* ctu = m_encData->getPicCTU(cuAddr);
+ origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frameFilter->m_frame);
+ }
+ }
+}
+
void FrameFilter::ParallelFilter::copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col)
{
// Copy SAO Top Reference Pixels
@@ -182,7 +198,7 @@
// Luma
memcpy(&m_sao.m_tmpU[0][col * ctuWidth], recY, ctuWidth * sizeof(pixel));
- X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer beyond bound write detected");
+ X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer write beyond bound detected");
// Chroma
if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
@@ -194,7 +210,32 @@
memcpy(&m_sao.m_tmpU[1][col * ctuWidth], recU, ctuWidth * sizeof(pixel));
memcpy(&m_sao.m_tmpU[2][col * ctuWidth], recV, ctuWidth * sizeof(pixel));
- X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer beyond bound write detected");
+ X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer write beyond bound detected");
+ }
+}
+
+void FrameFilter::ParallelFilter::copySaoAboveRefRow(PicYuv* reconPic, uint32_t cuAddr, int col)
+{
+ // Copy SAO Top Reference Pixels
+ int ctuWidth = g_maxCUSize;
+ const pixel* recY = reconPic->getPlaneAddr(0, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_stride);
+ const int cntCols = (m_frameFilter->m_numCols - col);
+
+ // Luma
+ memcpy(&m_sao.m_tmpU[0][col * ctuWidth], recY, cntCols * ctuWidth * sizeof(pixel));
+ X265_CHECK(col * ctuWidth + cntCols * ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer write beyond bound detected");
+
+ // Chroma
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
+ {
+ ctuWidth >>= m_sao.m_hChromaShift;
+
+ const pixel* recU = reconPic->getPlaneAddr(1, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_strideC);
+ const pixel* recV = reconPic->getPlaneAddr(2, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_strideC);
+ memcpy(&m_sao.m_tmpU[1][col * ctuWidth], recU, cntCols * ctuWidth * sizeof(pixel));
+ memcpy(&m_sao.m_tmpU[2][col * ctuWidth], recV, cntCols * ctuWidth * sizeof(pixel));
+
+ X265_CHECK(col * ctuWidth + cntCols * ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer beyond bound detected");
}
}
@@ -243,7 +284,7 @@
const intptr_t stride = reconPic->m_stride;
const intptr_t strideC = reconPic->m_strideC;
pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr);
- // // MUST BE check I400 since m_picOrg uninitialize in that case
+ // MUST BE check I400 since m_picOrg uninitialize in that case
pixel *pixU = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(lineStartCUAddr) : NULL;
pixel *pixV = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(lineStartCUAddr) : NULL;
int copySizeY = realW;
@@ -312,6 +353,79 @@
}
}
+void FrameFilter::ParallelFilter::processPostRow() const
+{
+
+ PicYuv *reconPic = m_frameFilter->m_frame->m_reconPic;
+
+ const uint32_t lumaMarginX = reconPic->m_lumaMarginX;
+ const uint32_t lumaMarginY = reconPic->m_lumaMarginY;
+ const uint32_t chromaMarginX = reconPic->m_chromaMarginX;
+ const uint32_t chromaMarginY = reconPic->m_chromaMarginY;
+ const int hChromaShift = reconPic->m_hChromaShift;
+ const int vChromaShift = reconPic->m_vChromaShift;
+ const intptr_t stride = reconPic->m_stride;
+ const intptr_t strideC = reconPic->m_strideC;
+ pixel *pixY0 = reconPic->getLumaAddr(m_rowAddr);
+ // MUST BE check I400 since m_picOrg uninitialize in that case
+ pixel *pixU0 = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(m_rowAddr) : NULL;
+ pixel *pixV0 = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(m_rowAddr) : NULL;
+ const int realH = getCUHeight();
+
+ // Border extend Left and Right
+ primitives.extendRowBorder(pixY0, reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
+ {
+ primitives.extendRowBorder(pixU0, strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, chromaMarginX);
+ primitives.extendRowBorder(pixV0, strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, chromaMarginX);
+ }
+
+ // Border extend Top
+ if (!m_row)
+ {
+ pixel *pixY = pixY0 - lumaMarginX;
+
+ for (uint32_t y = 0; y < lumaMarginY; y++)
+ memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
+
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
+ {
+ pixel *pixU = pixU0 - chromaMarginX;
+ pixel *pixV = pixV0 - chromaMarginX;
+
+ for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
+ {
+ memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel));
+ memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel));
+ }
+ }
+ }
+
+ // Border extend Bottom
+ if (m_row == m_frameFilter->m_numRows - 1)
+ {
+ pixel *pixY = pixY0 - lumaMarginX + (realH - 1) * stride;
+
+ for (uint32_t y = 0; y < lumaMarginY; y++)
+ memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
+
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
+ {
+ pixel *pixU = pixU0 - chromaMarginX + ((realH >> vChromaShift) - 1) * strideC;
+ pixel *pixV = pixV0 - chromaMarginX + ((realH >> vChromaShift) - 1) * strideC;
+
+ for (uint32_t y = 0; y < chromaMarginY; y++)
+ {
+ memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel));
+ memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel));
+ }
+ }
+ }
+
+ // Update finished CU cursor
+ m_frameFilter->m_frame->m_reconColCount[m_row].set(m_frameFilter->m_numCols - 1);
+}
+
// NOTE: Single Threading only
void FrameFilter::ParallelFilter::processTasks(int /*workerThreadId*/)
{
@@ -433,6 +547,75 @@
}
}
+void FrameFilter::ParallelFilter::processTasksRow(int /*workerThreadId*/)
+{
+ SAOParam* saoParam = m_encData->m_saoParam;
+ const CUGeom* cuGeoms = m_frameFilter->m_frameEncoder->m_cuGeoms;
+ const uint32_t* ctuGeomMap = m_frameFilter->m_frameEncoder->m_ctuGeomMap;
+ PicYuv* reconPic = m_encData->m_reconPic;
+ const int colStart = m_lastCol.get();
+ const int numCols = m_frameFilter->m_numCols;
+
+ // Avoid threading conflict
+ if (colStart >= numCols)
+ return;
+
+ // Previous row MUST BE finish
+ if (m_frameFilter->m_param->bEnableLoopFilter)
+ {
+ for (uint32_t col = (uint32_t)colStart; col < (uint32_t)numCols; col++)
+ {
+ const uint32_t cuAddr = m_rowAddr + col;
+
+ const CUData* ctu = m_encData->getPicCTU(cuAddr);
+ deblockCTU(ctu, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_VER);
+
+ if (col >= 1)
+ {
+ const CUData* ctuPrev = m_encData->getPicCTU(cuAddr - 1);
+ deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
+ }
+ }
+ // Process last column
+ {
+ const uint32_t cuAddr = m_rowAddr + numCols - 1;
+ const CUData* ctuPrev = m_encData->getPicCTU(cuAddr);
+ deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_HOR);
+ }
+ }
+
+ if (m_frameFilter->m_param->bEnableSAO)
+ {
+ // Save SAO bottom row reference pixels
+ copySaoAboveRefRow(reconPic, m_rowAddr + X265_MAX(0, colStart - 1), X265_MAX(0, colStart - 1));
+
+ m_sao.rdoSaoUnitRow(saoParam, m_rowAddr, X265_MAX(0, colStart - 2));
+
+ // Process Previous Row SAO CU
+ if (m_row >= 1)
+ {
+ const int saoProcessStartCol = X265_MAX(0, colStart - 3);
+
+ // Must delay 1 row to avoid thread data race conflict
+ m_prevRow->m_sao.processSaoUnitRow(saoParam->ctuParam[0], m_prevRow->m_row, saoProcessStartCol, 0);
+ m_prevRow->m_sao.processSaoUnitRow(saoParam->ctuParam[1], m_prevRow->m_row, saoProcessStartCol, 1);
+ m_prevRow->m_sao.processSaoUnitRow(saoParam->ctuParam[2], m_prevRow->m_row, saoProcessStartCol, 2);
+ m_prevRow->processSaoPcmRow(saoProcessStartCol);
+ }
+ }
+
+ if (m_row >= 1)
+ {
+ // TODO: process current row when SAO disabled
+ m_prevRow->processPostRow();
+ }
+
+ // Setting column sync counter
+ if (m_row >= 1)
+ m_frameFilter->m_frame->m_reconColCount[m_row - 1].set(numCols - 1); // REMOVE soon
+ m_lastDeblocked.set(numCols);
+}
+
void FrameFilter::processRow(int row)
{
ProfileScopeEvent(filterCTURow);
@@ -461,7 +644,7 @@
X265_CHECK((row < 1) || m_parallelFilter[row - 1].m_lastDeblocked.get() == m_numCols, "previous row not finish");
m_parallelFilter[row].m_allowedCol.set(m_numCols);
- m_parallelFilter[row].processTasks(-1);
+ m_parallelFilter[row].processTasksRow(-1);
if (row == m_numRows - 1)
{
@@ -480,11 +663,7 @@
}
// Process border extension on last row
- for(int col = 0; col < m_numCols; col++)
- {
- // m_reconColCount will be set in processPostCu()
- m_parallelFilter[row].processPostCu(col);
- }
+ m_parallelFilter[row].processPostRow();
}
}
diff -r dc62b47dd0d9 -r ad8ebeffdda4 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/encoder/framefilter.h Thu Feb 04 13:29:38 2016 +0800
@@ -88,15 +88,19 @@
{ }
void processTasks(int workerThreadId);
+ void processTasksRow(int workerThreadId);
// Apply SAO on a CU in current row
+ void processSaoPcmRow(int startCol);
void processSaoUnitCu(SAOParam *saoParam, int col);
// Copy and Save SAO reference pixels for SAO Rdo decide
void copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col);
+ void copySaoAboveRefRow(PicYuv* reconPic, uint32_t cuAddr, int col);
// Post-Process (Border extension)
void processPostCu(int col) const;
+ void processPostRow() const;
uint32_t getCUHeight() const
{
diff -r dc62b47dd0d9 -r ad8ebeffdda4 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Mon Jan 25 14:59:50 2016 +0530
+++ b/source/encoder/sao.cpp Thu Feb 04 13:29:38 2016 +0800
@@ -595,6 +595,79 @@
}
}
+/* Process SAO all units */
+void SAO::processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int startX, int plane)
+{
+ PicYuv* reconPic = m_frame->m_reconPic;
+ intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
+ uint32_t picWidth = m_param->sourceWidth;
+ int ctuWidth = g_maxCUSize;
+ int ctuHeight = g_maxCUSize;
+
+ if (plane)
+ {
+ picWidth >>= m_hChromaShift;
+ ctuWidth >>= m_hChromaShift;
+ ctuHeight >>= m_vChromaShift;
+ }
+
+ int addr = idxY * m_numCuInWidth;
+ pixel* rec = reconPic->getPlaneAddr(plane, addr);
+
+ if (startX == 0)
+ {
+ for (int i = 0; i < ctuHeight + 1; i++)
+ {
+ m_tmpL1[plane][i] = rec[0];
+ rec += stride;
+ }
+ }
+
+ for (int idxX = startX; idxX < m_numCuInWidth; idxX++)
+ {
+ addr = idxY * m_numCuInWidth + idxX;
+
+ bool mergeLeftFlag = ctuParam[addr].mergeMode == SAO_MERGE_LEFT;
+ int typeIdx = ctuParam[addr].typeIdx;
+
+ if (idxX != (m_numCuInWidth - 1))
+ {
+ rec = reconPic->getPlaneAddr(plane, addr);
+ for (int i = 0; i < ctuHeight + 1; i++)
+ {
+ m_tmpL2[plane][i] = rec[ctuWidth - 1];
+ rec += stride;
+ }
+ }
+
+ if (typeIdx >= 0)
+ {
+ if (!mergeLeftFlag)
+ {
+ if (typeIdx == SAO_BO)
+ {
+ memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0]));
+
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ m_offsetBo[plane][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
+ }
+ else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+ {
+ int offset[NUM_EDGETYPE];
+ offset[0] = 0;
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC;
+
+ for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
+ m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+ }
+ }
+ processSaoCu(addr, typeIdx, plane);
+ }
+ std::swap(m_tmpL1[plane], m_tmpL2[plane]);
+ }
+}
+
/* Process SAO unit */
void SAO::processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX)
{
@@ -1361,6 +1434,157 @@
}
}
+void SAO::rdoSaoUnitRow(SAOParam* saoParam, int rowBaseAddr, int startCol)
+{
+ double lambda[3] = {m_lumaLambda, m_chromaLambda, m_chromaLambda};
+
+ bool chroma = m_param->internalCsp != X265_CSP_I400;
+ int planes = chroma ? 3 : 1;
+ bool allowMerge[2] = {(startCol != 0), (rowBaseAddr != 0)}; // left, up
+// int addrMerge[2] = {(startCol - 1), (rowBaseAddr ? startCol - m_numCuInWidth : -1)};// left, up
+
+ for(int idxX = startCol; idxX < m_numCuInWidth; idxX++)
+ {
+// X265_CHECK((idxX ? idxX - 1 : -1) == addrMerge[0], "addrMerge[0] check failure");
+ const int addr = rowBaseAddr + idxX;
+
+ int addrMerge[2] = {(idxX ? addr - 1 : -1), (rowBaseAddr ? addr - m_numCuInWidth : -1)};// left, up
+
+ m_entropyCoder.load(m_rdContexts.cur);
+ if (allowMerge[0])
+ m_entropyCoder.codeSaoMerge(0);
+ if (allowMerge[1])
+ m_entropyCoder.codeSaoMerge(0);
+ m_entropyCoder.store(m_rdContexts.temp);
+
+ // reset stats Y, Cb, Cr
+ X265_CHECK(sizeof(PerPlane) == (sizeof(int32_t) * (NUM_PLANE * MAX_NUM_SAO_TYPE * MAX_NUM_SAO_CLASS)), "Found Padding space in struct PerPlane");
+
+ // TODO: Confirm the address space is continuous
+ if (m_param->bSaoNonDeblocked)
+ {
+ memcpy(m_count, m_countPreDblk[addr], sizeof(m_count));
+ memcpy(m_offsetOrg, m_offsetOrgPreDblk[addr], sizeof(m_offsetOrg));
+ }
+ else
+ {
+ memset(m_count, 0, sizeof(m_count));
+ memset(m_offsetOrg, 0, sizeof(m_offsetOrg));
+ }
+
+ for (int i = 0; i < planes; i++)
+ saoParam->ctuParam[i][addr].reset();
+
+ if (saoParam->bSaoFlag[0])
+ {
+ calcSaoStatsCu(addr, 0);
+ saoStatsInitialOffset(0);
+ }
+
+ if (saoParam->bSaoFlag[1])
+ {
+ calcSaoStatsCu(addr, 1);
+ calcSaoStatsCu(addr, 2);
+ saoStatsInitialOffset(1);
+ // saoStatsInitialOffset(2);
+ }
+
+ double mergeDist[NUM_MERGE_MODE] = { 0.0 };
+ saoLumaComponentParamDist(saoParam, addr, mergeDist);
+ if (chroma)
+ saoChromaComponentParamDist(saoParam, addr, mergeDist);
+
+ if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
+ {
+ // Cost of new SAO_params
+ m_entropyCoder.load(m_rdContexts.cur);
+ m_entropyCoder.resetBits();
+ if (allowMerge[0])
+ m_entropyCoder.codeSaoMerge(0);
+ if (allowMerge[1])
+ m_entropyCoder.codeSaoMerge(0);
+ for (int plane = 0; plane < planes; plane++)
+ {
+ if (saoParam->bSaoFlag[plane > 0])
+ m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
+ }
+
+ uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
+ double bestCost = mergeDist[0] + (double)rate;
+ m_entropyCoder.store(m_rdContexts.temp);
+
+ // Cost of merge left or Up
+ for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
+ {
+ if (!allowMerge[mergeIdx])
+ continue;
+
+ for (int plane = 0; plane < 3; plane++)
+ {
+ int64_t estDist = 0;
+ SaoCtuParam* mergeSrcParam = &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]);
+ int typeIdx = mergeSrcParam->typeIdx;
+ if (typeIdx >= 0)
+ {
+ int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
+ {
+ int mergeOffset = mergeSrcParam->offset[classIdx];
+ estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
+ }
+ }
+
+ mergeDist[mergeIdx + 1] += ((double)estDist / lambda[plane]);
+ }
+
+
+ m_entropyCoder.load(m_rdContexts.cur);
+ m_entropyCoder.resetBits();
+ if (allowMerge[0])
+ m_entropyCoder.codeSaoMerge(1 - mergeIdx);
+ if (allowMerge[1] && (mergeIdx == 1))
+ m_entropyCoder.codeSaoMerge(1);
+
+ rate = m_entropyCoder.getNumberOfWrittenBits();
+ double mergeCost = mergeDist[mergeIdx + 1] + (double)rate;
+ if (mergeCost < bestCost)
+ {
+ SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
+ bestCost = mergeCost;
+ m_entropyCoder.store(m_rdContexts.temp);
+ for (int plane = 0; plane < planes; plane++)
+ {
+ if (saoParam->bSaoFlag[plane > 0])
+ {
+ SaoCtuParam* dstCtuParam = &saoParam->ctuParam[plane][addr];
+ SaoCtuParam* mergeSrcParam = &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]);
+ dstCtuParam->mergeMode = mergeMode;
+ dstCtuParam->typeIdx = mergeSrcParam->typeIdx;
+ dstCtuParam->bandPos = mergeSrcParam->bandPos;
+
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ dstCtuParam->offset[i] = mergeSrcParam->offset[i];
+ }
+ }
+ }
+ }
+
+ if (saoParam->ctuParam[0][addr].typeIdx < 0)
+ m_numNoSao[0]++;
+ if (chroma && saoParam->ctuParam[1][addr].typeIdx < 0)
+ m_numNoSao[1]++;
+ m_entropyCoder.load(m_rdContexts.temp);
+ m_entropyCoder.store(m_rdContexts.cur);
+ }
+
+ // Left merge still available after first CU
+ allowMerge[0] = true;
+
+ // next CU address
+ //addrMerge[0]++;
+ //addrMerge[1] += (rowBaseAddr ? 1 : 0);
+ }
+}
// Rounds the division of initial offsets by the number of samples in
// each of the statistics table entries.
diff -r dc62b47dd0d9 -r ad8ebeffdda4 source/encoder/sao.h
--- a/source/encoder/sao.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/encoder/sao.h Thu Feb 04 13:29:38 2016 +0800
@@ -132,7 +132,7 @@
// CTU-based SAO process without slice granularity
void processSaoCu(int addr, int typeIdx, int plane);
- void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane);
+ void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int startX, int plane);
void processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX);
void processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int idxX);
@@ -147,7 +147,7 @@
inline int estIterOffset(int typeIdx, double lambda, int offset, int32_t count, int32_t offsetOrg,
int& currentDistortionTableBo, double& currentRdCostTableBo);
void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
-// void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
+ void rdoSaoUnitRow(SAOParam* saoParam, int rowBaseAddr, int startCol);
void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int addr);
void saoStatsInitialOffset(int plane);
More information about the x265-devel
mailing list