[x265] [PATCH 1 of 2] improve performance by full row process
chen
chenm003 at 163.com
Fri Feb 5 14:26:54 CET 2016
It affect performance on preset ultrafast and less cores system, the Row process function reduce overhead cost
At 2016-02-05 21:24:30,"Ashok Kumar Mishra" <ashok at multicorewareinc.com> wrote:
I think there is no point of using two fuctions for the same purpose.
processTasks() and processTasksRow(). Either we can use processTasks() or processTasksRow().
I dont know how much performance we are achieving by doing this, but from my perspective we should
look into the complexity of the code at the same time.
On Thu, Feb 4, 2016 at 10:59 AM, Min Chen <chenm003 at 163.com> wrote:
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1454563778 -28800
# Node ID ad8ebeffdda44378dd93b787215a937a26be980e
# Parent dc62b47dd0d98f732165345883edac55320baec1
improve performance by full row process
---
source/encoder/framefilter.cpp | 197 +++++++++++++++++++++++++++++++++--
source/encoder/framefilter.h | 4 +
source/encoder/sao.cpp | 224 ++++++++++++++++++++++++++++++++++++++++
source/encoder/sao.h | 4 +-
4 files changed, 418 insertions(+), 11 deletions(-)
diff -r dc62b47dd0d9 -r ad8ebeffdda4 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Mon Jan 25 14:59:50 2016 +0530
+++ b/source/encoder/framefilter.cpp Thu Feb 04 13:29:38 2016 +0800
@@ -174,6 +174,22 @@
restoreOrigLosslessYuv(cu, frame, absPartIdx);
}
+void FrameFilter::ParallelFilter::processSaoPcmRow(int startCol)
+{
+ if (m_encData->m_slice->m_pps->bTransquantBypassEnabled)
+ {
+ const CUGeom* cuGeoms = m_frameFilter->m_frameEncoder->m_cuGeoms;
+ const uint32_t* ctuGeomMap = m_frameFilter->m_frameEncoder->m_ctuGeomMap;
+
+ for (int col = startCol; col < m_frameFilter->m_numCols; col++)
+ {
+ uint32_t cuAddr = m_rowAddr + col;
+ const CUData* ctu = m_encData->getPicCTU(cuAddr);
+ origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frameFilter->m_frame);
+ }
+ }
+}
+
void FrameFilter::ParallelFilter::copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col)
{
// Copy SAO Top Reference Pixels
@@ -182,7 +198,7 @@
// Luma
memcpy(&m_sao.m_tmpU[0][col * ctuWidth], recY, ctuWidth * sizeof(pixel));
- X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer beyond bound write detected");
+ X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer write beyond bound detected");
// Chroma
if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
@@ -194,7 +210,32 @@
memcpy(&m_sao.m_tmpU[1][col * ctuWidth], recU, ctuWidth * sizeof(pixel));
memcpy(&m_sao.m_tmpU[2][col * ctuWidth], recV, ctuWidth * sizeof(pixel));
- X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer beyond bound write detected");
+ X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer write beyond bound detected");
+ }
+}
+
+void FrameFilter::ParallelFilter::copySaoAboveRefRow(PicYuv* reconPic, uint32_t cuAddr, int col)
+{
+ // Copy SAO Top Reference Pixels
+ int ctuWidth = g_maxCUSize;
+ const pixel* recY = reconPic->getPlaneAddr(0, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_stride);
+ const int cntCols = (m_frameFilter->m_numCols - col);
+
+ // Luma
+ memcpy(&m_sao.m_tmpU[0][col * ctuWidth], recY, cntCols * ctuWidth * sizeof(pixel));
+ X265_CHECK(col * ctuWidth + cntCols * ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer write beyond bound detected");
+
+ // Chroma
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
+ {
+ ctuWidth >>= m_sao.m_hChromaShift;
+
+ const pixel* recU = reconPic->getPlaneAddr(1, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_strideC);
+ const pixel* recV = reconPic->getPlaneAddr(2, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_strideC);
+ memcpy(&m_sao.m_tmpU[1][col * ctuWidth], recU, cntCols * ctuWidth * sizeof(pixel));
+ memcpy(&m_sao.m_tmpU[2][col * ctuWidth], recV, cntCols * ctuWidth * sizeof(pixel));
+
+ X265_CHECK(col * ctuWidth + cntCols * ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer beyond bound detected");
}
}
@@ -243,7 +284,7 @@
const intptr_t stride = reconPic->m_stride;
const intptr_t strideC = reconPic->m_strideC;
pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr);
- // // MUST BE check I400 since m_picOrg uninitialize in that case
+ // MUST BE check I400 since m_picOrg uninitialize in that case
pixel *pixU = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(lineStartCUAddr) : NULL;
pixel *pixV = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(lineStartCUAddr) : NULL;
int copySizeY = realW;
@@ -312,6 +353,79 @@
}
}
+void FrameFilter::ParallelFilter::processPostRow() const
+{
+
+ PicYuv *reconPic = m_frameFilter->m_frame->m_reconPic;
+
+ const uint32_t lumaMarginX = reconPic->m_lumaMarginX;
+ const uint32_t lumaMarginY = reconPic->m_lumaMarginY;
+ const uint32_t chromaMarginX = reconPic->m_chromaMarginX;
+ const uint32_t chromaMarginY = reconPic->m_chromaMarginY;
+ const int hChromaShift = reconPic->m_hChromaShift;
+ const int vChromaShift = reconPic->m_vChromaShift;
+ const intptr_t stride = reconPic->m_stride;
+ const intptr_t strideC = reconPic->m_strideC;
+ pixel *pixY0 = reconPic->getLumaAddr(m_rowAddr);
+ // MUST BE check I400 since m_picOrg uninitialize in that case
+ pixel *pixU0 = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(m_rowAddr) : NULL;
+ pixel *pixV0 = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(m_rowAddr) : NULL;
+ const int realH = getCUHeight();
+
+ // Border extend Left and Right
+ primitives.extendRowBorder(pixY0, reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
+ {
+ primitives.extendRowBorder(pixU0, strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, chromaMarginX);
+ primitives.extendRowBorder(pixV0, strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, chromaMarginX);
+ }
+
+ // Border extend Top
+ if (!m_row)
+ {
+ pixel *pixY = pixY0 - lumaMarginX;
+
+ for (uint32_t y = 0; y < lumaMarginY; y++)
+ memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
+
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
+ {
+ pixel *pixU = pixU0 - chromaMarginX;
+ pixel *pixV = pixV0 - chromaMarginX;
+
+ for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
+ {
+ memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel));
+ memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel));
+ }
+ }
+ }
+
+ // Border extend Bottom
+ if (m_row == m_frameFilter->m_numRows - 1)
+ {
+ pixel *pixY = pixY0 - lumaMarginX + (realH - 1) * stride;
+
+ for (uint32_t y = 0; y < lumaMarginY; y++)
+ memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
+
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
+ {
+ pixel *pixU = pixU0 - chromaMarginX + ((realH >> vChromaShift) - 1) * strideC;
+ pixel *pixV = pixV0 - chromaMarginX + ((realH >> vChromaShift) - 1) * strideC;
+
+ for (uint32_t y = 0; y < chromaMarginY; y++)
+ {
+ memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel));
+ memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel));
+ }
+ }
+ }
+
+ // Update finished CU cursor
+ m_frameFilter->m_frame->m_reconColCount[m_row].set(m_frameFilter->m_numCols - 1);
+}
+
// NOTE: Single Threading only
void FrameFilter::ParallelFilter::processTasks(int /*workerThreadId*/)
{
@@ -433,6 +547,75 @@
}
}
+void FrameFilter::ParallelFilter::processTasksRow(int /*workerThreadId*/)
+{
+ SAOParam* saoParam = m_encData->m_saoParam;
+ const CUGeom* cuGeoms = m_frameFilter->m_frameEncoder->m_cuGeoms;
+ const uint32_t* ctuGeomMap = m_frameFilter->m_frameEncoder->m_ctuGeomMap;
+ PicYuv* reconPic = m_encData->m_reconPic;
+ const int colStart = m_lastCol.get();
+ const int numCols = m_frameFilter->m_numCols;
+
+ // Avoid threading conflict
+ if (colStart >= numCols)
+ return;
+
+ // Previous row MUST BE finish
+ if (m_frameFilter->m_param->bEnableLoopFilter)
+ {
+ for (uint32_t col = (uint32_t)colStart; col < (uint32_t)numCols; col++)
+ {
+ const uint32_t cuAddr = m_rowAddr + col;
+
+ const CUData* ctu = m_encData->getPicCTU(cuAddr);
+ deblockCTU(ctu, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_VER);
+
+ if (col >= 1)
+ {
+ const CUData* ctuPrev = m_encData->getPicCTU(cuAddr - 1);
+ deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
+ }
+ }
+ // Process last column
+ {
+ const uint32_t cuAddr = m_rowAddr + numCols - 1;
+ const CUData* ctuPrev = m_encData->getPicCTU(cuAddr);
+ deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_HOR);
+ }
+ }
+
+ if (m_frameFilter->m_param->bEnableSAO)
+ {
+ // Save SAO bottom row reference pixels
+ copySaoAboveRefRow(reconPic, m_rowAddr + X265_MAX(0, colStart - 1), X265_MAX(0, colStart - 1));
+
+ m_sao.rdoSaoUnitRow(saoParam, m_rowAddr, X265_MAX(0, colStart - 2));
+
+ // Process Previous Row SAO CU
+ if (m_row >= 1)
+ {
+ const int saoProcessStartCol = X265_MAX(0, colStart - 3);
+
+ // Must delay 1 row to avoid thread data race conflict
+ m_prevRow->m_sao.processSaoUnitRow(saoParam->ctuParam[0], m_prevRow->m_row, saoProcessStartCol, 0);
+ m_prevRow->m_sao.processSaoUnitRow(saoParam->ctuParam[1], m_prevRow->m_row, saoProcessStartCol, 1);
+ m_prevRow->m_sao.processSaoUnitRow(saoParam->ctuParam[2], m_prevRow->m_row, saoProcessStartCol, 2);
+ m_prevRow->processSaoPcmRow(saoProcessStartCol);
+ }
+ }
+
+ if (m_row >= 1)
+ {
+ // TODO: process current row when SAO disabled
+ m_prevRow->processPostRow();
+ }
+
+ // Setting column sync counter
+ if (m_row >= 1)
+ m_frameFilter->m_frame->m_reconColCount[m_row - 1].set(numCols - 1); // REMOVE soon
+ m_lastDeblocked.set(numCols);
+}
+
void FrameFilter::processRow(int row)
{
ProfileScopeEvent(filterCTURow);
@@ -461,7 +644,7 @@
X265_CHECK((row < 1) || m_parallelFilter[row - 1].m_lastDeblocked.get() == m_numCols, "previous row not finish");
m_parallelFilter[row].m_allowedCol.set(m_numCols);
- m_parallelFilter[row].processTasks(-1);
+ m_parallelFilter[row].processTasksRow(-1);
if (row == m_numRows - 1)
{
@@ -480,11 +663,7 @@
}
// Process border extension on last row
- for(int col = 0; col < m_numCols; col++)
- {
- // m_reconColCount will be set in processPostCu()
- m_parallelFilter[row].processPostCu(col);
- }
+ m_parallelFilter[row].processPostRow();
}
}
diff -r dc62b47dd0d9 -r ad8ebeffdda4 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/encoder/framefilter.h Thu Feb 04 13:29:38 2016 +0800
@@ -88,15 +88,19 @@
{ }
void processTasks(int workerThreadId);
+ void processTasksRow(int workerThreadId);
// Apply SAO on a CU in current row
+ void processSaoPcmRow(int startCol);
void processSaoUnitCu(SAOParam *saoParam, int col);
// Copy and Save SAO reference pixels for SAO Rdo decide
void copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col);
+ void copySaoAboveRefRow(PicYuv* reconPic, uint32_t cuAddr, int col);
// Post-Process (Border extension)
void processPostCu(int col) const;
+ void processPostRow() const;
uint32_t getCUHeight() const
{
diff -r dc62b47dd0d9 -r ad8ebeffdda4 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Mon Jan 25 14:59:50 2016 +0530
+++ b/source/encoder/sao.cpp Thu Feb 04 13:29:38 2016 +0800
@@ -595,6 +595,79 @@
}
}
+/* Process SAO all units */
+void SAO::processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int startX, int plane)
+{
+ PicYuv* reconPic = m_frame->m_reconPic;
+ intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
+ uint32_t picWidth = m_param->sourceWidth;
+ int ctuWidth = g_maxCUSize;
+ int ctuHeight = g_maxCUSize;
+
+ if (plane)
+ {
+ picWidth >>= m_hChromaShift;
+ ctuWidth >>= m_hChromaShift;
+ ctuHeight >>= m_vChromaShift;
+ }
+
+ int addr = idxY * m_numCuInWidth;
+ pixel* rec = reconPic->getPlaneAddr(plane, addr);
+
+ if (startX == 0)
+ {
+ for (int i = 0; i < ctuHeight + 1; i++)
+ {
+ m_tmpL1[plane][i] = rec[0];
+ rec += stride;
+ }
+ }
+
+ for (int idxX = startX; idxX < m_numCuInWidth; idxX++)
+ {
+ addr = idxY * m_numCuInWidth + idxX;
+
+ bool mergeLeftFlag = ctuParam[addr].mergeMode == SAO_MERGE_LEFT;
+ int typeIdx = ctuParam[addr].typeIdx;
+
+ if (idxX != (m_numCuInWidth - 1))
+ {
+ rec = reconPic->getPlaneAddr(plane, addr);
+ for (int i = 0; i < ctuHeight + 1; i++)
+ {
+ m_tmpL2[plane][i] = rec[ctuWidth - 1];
+ rec += stride;
+ }
+ }
+
+ if (typeIdx >= 0)
+ {
+ if (!mergeLeftFlag)
+ {
+ if (typeIdx == SAO_BO)
+ {
+ memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0]));
+
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ m_offsetBo[plane][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
+ }
+ else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+ {
+ int offset[NUM_EDGETYPE];
+ offset[0] = 0;
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC;
+
+ for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
+ m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+ }
+ }
+ processSaoCu(addr, typeIdx, plane);
+ }
+ std::swap(m_tmpL1[plane], m_tmpL2[plane]);
+ }
+}
+
/* Process SAO unit */
void SAO::processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX)
{
@@ -1361,6 +1434,157 @@
}
}
+void SAO::rdoSaoUnitRow(SAOParam* saoParam, int rowBaseAddr, int startCol)
+{
+ double lambda[3] = {m_lumaLambda, m_chromaLambda, m_chromaLambda};
+
+ bool chroma = m_param->internalCsp != X265_CSP_I400;
+ int planes = chroma ? 3 : 1;
+ bool allowMerge[2] = {(startCol != 0), (rowBaseAddr != 0)}; // left, up
+// int addrMerge[2] = {(startCol - 1), (rowBaseAddr ? startCol - m_numCuInWidth : -1)};// left, up
+
+ for(int idxX = startCol; idxX < m_numCuInWidth; idxX++)
+ {
+// X265_CHECK((idxX ? idxX - 1 : -1) == addrMerge[0], "addrMerge[0] check failure");
+ const int addr = rowBaseAddr + idxX;
+
+ int addrMerge[2] = {(idxX ? addr - 1 : -1), (rowBaseAddr ? addr - m_numCuInWidth : -1)};// left, up
+
+ m_entropyCoder.load(m_rdContexts.cur);
+ if (allowMerge[0])
+ m_entropyCoder.codeSaoMerge(0);
+ if (allowMerge[1])
+ m_entropyCoder.codeSaoMerge(0);
+ m_entropyCoder.store(m_rdContexts.temp);
+
+ // reset stats Y, Cb, Cr
+ X265_CHECK(sizeof(PerPlane) == (sizeof(int32_t) * (NUM_PLANE * MAX_NUM_SAO_TYPE * MAX_NUM_SAO_CLASS)), "Found Padding space in struct PerPlane");
+
+ // TODO: Confirm the address space is continuous
+ if (m_param->bSaoNonDeblocked)
+ {
+ memcpy(m_count, m_countPreDblk[addr], sizeof(m_count));
+ memcpy(m_offsetOrg, m_offsetOrgPreDblk[addr], sizeof(m_offsetOrg));
+ }
+ else
+ {
+ memset(m_count, 0, sizeof(m_count));
+ memset(m_offsetOrg, 0, sizeof(m_offsetOrg));
+ }
+
+ for (int i = 0; i < planes; i++)
+ saoParam->ctuParam[i][addr].reset();
+
+ if (saoParam->bSaoFlag[0])
+ {
+ calcSaoStatsCu(addr, 0);
+ saoStatsInitialOffset(0);
+ }
+
+ if (saoParam->bSaoFlag[1])
+ {
+ calcSaoStatsCu(addr, 1);
+ calcSaoStatsCu(addr, 2);
+ saoStatsInitialOffset(1);
+ // saoStatsInitialOffset(2);
+ }
+
+ double mergeDist[NUM_MERGE_MODE] = { 0.0 };
+ saoLumaComponentParamDist(saoParam, addr, mergeDist);
+ if (chroma)
+ saoChromaComponentParamDist(saoParam, addr, mergeDist);
+
+ if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
+ {
+ // Cost of new SAO_params
+ m_entropyCoder.load(m_rdContexts.cur);
+ m_entropyCoder.resetBits();
+ if (allowMerge[0])
+ m_entropyCoder.codeSaoMerge(0);
+ if (allowMerge[1])
+ m_entropyCoder.codeSaoMerge(0);
+ for (int plane = 0; plane < planes; plane++)
+ {
+ if (saoParam->bSaoFlag[plane > 0])
+ m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
+ }
+
+ uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
+ double bestCost = mergeDist[0] + (double)rate;
+ m_entropyCoder.store(m_rdContexts.temp);
+
+ // Cost of merge left or Up
+ for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
+ {
+ if (!allowMerge[mergeIdx])
+ continue;
+
+ for (int plane = 0; plane < 3; plane++)
+ {
+ int64_t estDist = 0;
+ SaoCtuParam* mergeSrcParam = &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]);
+ int typeIdx = mergeSrcParam->typeIdx;
+ if (typeIdx >= 0)
+ {
+ int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
+ {
+ int mergeOffset = mergeSrcParam->offset[classIdx];
+ estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
+ }
+ }
+
+ mergeDist[mergeIdx + 1] += ((double)estDist / lambda[plane]);
+ }
+
+
+ m_entropyCoder.load(m_rdContexts.cur);
+ m_entropyCoder.resetBits();
+ if (allowMerge[0])
+ m_entropyCoder.codeSaoMerge(1 - mergeIdx);
+ if (allowMerge[1] && (mergeIdx == 1))
+ m_entropyCoder.codeSaoMerge(1);
+
+ rate = m_entropyCoder.getNumberOfWrittenBits();
+ double mergeCost = mergeDist[mergeIdx + 1] + (double)rate;
+ if (mergeCost < bestCost)
+ {
+ SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
+ bestCost = mergeCost;
+ m_entropyCoder.store(m_rdContexts.temp);
+ for (int plane = 0; plane < planes; plane++)
+ {
+ if (saoParam->bSaoFlag[plane > 0])
+ {
+ SaoCtuParam* dstCtuParam = &saoParam->ctuParam[plane][addr];
+ SaoCtuParam* mergeSrcParam = &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]);
+ dstCtuParam->mergeMode = mergeMode;
+ dstCtuParam->typeIdx = mergeSrcParam->typeIdx;
+ dstCtuParam->bandPos = mergeSrcParam->bandPos;
+
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ dstCtuParam->offset[i] = mergeSrcParam->offset[i];
+ }
+ }
+ }
+ }
+
+ if (saoParam->ctuParam[0][addr].typeIdx < 0)
+ m_numNoSao[0]++;
+ if (chroma && saoParam->ctuParam[1][addr].typeIdx < 0)
+ m_numNoSao[1]++;
+ m_entropyCoder.load(m_rdContexts.temp);
+ m_entropyCoder.store(m_rdContexts.cur);
+ }
+
+ // Left merge still available after first CU
+ allowMerge[0] = true;
+
+ // next CU address
+ //addrMerge[0]++;
+ //addrMerge[1] += (rowBaseAddr ? 1 : 0);
+ }
+}
// Rounds the division of initial offsets by the number of samples in
// each of the statistics table entries.
diff -r dc62b47dd0d9 -r ad8ebeffdda4 source/encoder/sao.h
--- a/source/encoder/sao.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/encoder/sao.h Thu Feb 04 13:29:38 2016 +0800
@@ -132,7 +132,7 @@
// CTU-based SAO process without slice granularity
void processSaoCu(int addr, int typeIdx, int plane);
- void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane);
+ void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int startX, int plane);
void processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX);
void processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int idxX);
@@ -147,7 +147,7 @@
inline int estIterOffset(int typeIdx, double lambda, int offset, int32_t count, int32_t offsetOrg,
int& currentDistortionTableBo, double& currentRdCostTableBo);
void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
-// void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
+ void rdoSaoUnitRow(SAOParam* saoParam, int rowBaseAddr, int startCol);
void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int addr);
void saoStatsInitialOffset(int plane);
_______________________________________________
x265-devel mailing list
x265-devel at videolan.org
https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160205/ed3a34d0/attachment-0001.html>
More information about the x265-devel
mailing list