[x265] [PATCH] framepp: Active frame parallelism
Min Chen
chenm003 at 163.com
Tue Sep 3 09:47:09 CEST 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1378194421 -28800
# Node ID bc1887f2bbc47044eecc607f35aae9a1954abbeb
# Parent 2f9fcf7689181107c11ec400047edef1e714cb67
framepp: Active frame parallelism
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -2715,14 +2715,17 @@
return false;
}
-Void TComDataCU::clipMv(MV& outMV)
+Void TComDataCU::clipMv(MV& outMV, int rowsAvailable)
{
Int mvshift = 2;
Int offset = 8;
Int xmax = (m_slice->getSPS()->getPicWidthInLumaSamples() + offset - m_cuPelX - 1) << mvshift;
Int xmin = (-(Int)g_maxCUWidth - offset - (Int)m_cuPelX + 1) << mvshift;
- Int ymax = (m_slice->getSPS()->getPicHeightInLumaSamples() + offset - m_cuPelY - 1) << mvshift;
+ int ylimit = m_slice->getSPS()->getPicHeightInLumaSamples();
+ if (rowsAvailable)
+ ylimit = X265_MIN(rowsAvailable * g_maxCUHeight, ylimit);
+ Int ymax = (ylimit + offset - m_cuPelY - 1) << mvshift;
Int ymin = (-(Int)g_maxCUHeight - offset - (Int)m_cuPelY + 1) << mvshift;
outMV.x = min(xmax, max(xmin, (Int)outMV.x));
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h Mon Sep 02 16:16:04 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.h Tue Sep 03 15:47:01 2013 +0800
@@ -448,7 +448,7 @@
Void setMVPIdxSubParts(Int mvpIdx, RefPicList picList, UInt absPartIdx, UInt partIdx, UInt depth);
Void setMVPNumSubParts(Int iMVPNum, RefPicList picList, UInt absPartIdx, UInt partIdx, UInt depth);
- Void clipMv(x265::MV& outMV);
+ Void clipMv(x265::MV& outMV, int rowsAvailable = 0);
Void getMvPredLeft(x265::MV& mvPred) { mvPred = m_mvFieldA.mv; }
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -60,8 +60,6 @@
m_picOrgV = NULL;
m_refList = NULL;
-
- m_bIsBorderExtended = false;
}
TComPicYuv::~TComPicYuv()
@@ -98,8 +96,6 @@
m_picOrgU = m_picBufU + m_chromaMarginY * getCStride() + m_chromaMarginX;
m_picOrgV = m_picBufV + m_chromaMarginY * getCStride() + m_chromaMarginX;
- m_bIsBorderExtended = false;
-
m_cuOffsetY = new Int[numCuInWidth * numCuInHeight];
m_cuOffsetC = new Int[numCuInWidth * numCuInHeight];
for (Int cuRow = 0; cuRow < numCuInHeight; cuRow++)
@@ -240,14 +236,8 @@
x265::MotionReference* TComPicYuv::generateMotionReference(wpScalingParam *w)
{
- if (!m_bIsBorderExtended)
- {
- /* HPEL generation requires luma integer plane to already be extended */
- xExtendPicCompBorder(getLumaAddr(), getStride(), getWidth(), getHeight(), m_lumaMarginX, m_lumaMarginY);
- xExtendPicCompBorder(getCbAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);
- xExtendPicCompBorder(getCrAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);
- m_bIsBorderExtended = true;
- }
+ /* HPEL generation requires luma integer plane to already be extended */
+ // NOTE: We extend border every CURow, so I remove code here
MotionReference *mref;
for (mref = m_refList; mref != NULL; mref = mref->m_next)
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h Mon Sep 02 16:16:04 2013 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.h Tue Sep 03 15:47:01 2013 +0800
@@ -94,8 +94,6 @@
Int m_stride;
Int m_strideC;
- Bool m_bIsBorderExtended;
-
public:
Int m_numCuInWidth;
Int m_numCuInHeight;
@@ -132,7 +130,9 @@
Int getLumaMarginY() { return m_lumaMarginY; }
- Int getChromaMargin() { return m_chromaMarginX; }
+ Int getChromaMarginX() { return m_chromaMarginX; }
+
+ Int getChromaMarginY() { return m_chromaMarginY; }
// ------------------------------------------------------------------------------------------------
// Access function for picture buffer
@@ -181,9 +181,6 @@
// Dump picture
Void dump(Char* pFileName, Bool bAdd = false);
- // Set border extension flag
- Void clearExtendedFlag() { m_bIsBorderExtended = false; }
-
friend class x265::MotionReference;
}; // END CLASS DEFINITION TComPicYuv
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -452,7 +452,6 @@
if (cu->getSlice()->getSliceType() != I_SLICE)
fprintf(fp1, "\n CU number : %d ", totalCU);
#endif
- //printf("compressCU[%2d]: Best=0x%08X, Temp=0x%08X\n", omp_get_thread_num(), m_ppcBestCU[0], m_ppcTempCU[0]);
m_addSADDepth = 0;
m_LCUPredictionSAD = 0;
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -2946,6 +2946,7 @@
UInt TEncSearch::xGetTemplateCost(TComDataCU* cu, UInt partAddr, TComYuv* templateCand, MV mvCand, Int mvpIdx,
Int mvpCandCount, RefPicList picList, Int refIdx, Int sizex, Int sizey)
{
+ // TODO: does it clip with m_referenceRowsAvailable?
cu->clipMv(mvCand);
// prediction pattern
@@ -3013,8 +3014,8 @@
mvmin = mvp - dist;
mvmax = mvp + dist;
- cu->clipMv(mvmin);
- cu->clipMv(mvmax);
+ cu->clipMv(mvmin, m_referenceRowsAvailable);
+ cu->clipMv(mvmax, m_referenceRowsAvailable);
mvmin >>= 2;
mvmax >>= 2;
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h Mon Sep 02 16:16:04 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h Tue Sep 03 15:47:01 2013 +0800
@@ -68,6 +68,7 @@
public:
x265::MotionEstimate m_me;
+ int m_referenceRowsAvailable;
protected:
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/Lib/TLibEncoder/TEncTop.cpp
--- a/source/Lib/TLibEncoder/TEncTop.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncTop.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -268,7 +268,7 @@
}
// main encode processing, TBD multi-threading
- curEncoder->compressFrame(fenc);
+ curEncoder->m_enable.trigger();
}
return ret;
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/common/common.cpp
--- a/source/common/common.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/common/common.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -260,6 +260,10 @@
CONFIRM(param->rc.rateControlMode<X265_RC_ABR || param->rc.rateControlMode> X265_RC_CRF,
"Rate control mode is out of range");
+ // TODO: in this condition, we POC system will fuzzy, here I use keyframeMax because minimal lookahead is keyframeMax
+ CONFIRM(param->frameNumThreads>param->keyframeMax,
+ "Frame Parallelism Threads must be less or equal to Lookahead(keyframeMax) frame number");
+
// max CU size should be power of 2
uint32_t ui = param->maxCUSize;
while (ui)
@@ -363,6 +367,7 @@
x265_log(param, X265_LOG_INFO, "RDpenalty : %d\n", param->rdPenalty);
}
x265_log(param, X265_LOG_INFO, "Lookahead len / -b / bAdapt : %d / %d / %d\n", param->lookaheadDepth, param->bframes, param->bFrameAdaptive);
+ x265_log(param, X265_LOG_INFO, "Frame parallelism thread : %d\n", param->frameNumThreads);
x265_log(param, X265_LOG_INFO, "tools: ");
#define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, "%s ", STR)
TOOLOPT(param->bEnableRectInter, "rect");
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/common/ipfilter.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -776,5 +776,6 @@
p.filterVwghtd = filterVerticalWeighted;
p.filterHwghtd = filterHorizontalWeighted;
+ p.extendRowBorder = extendCURowColBorder;
}
}
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/common/primitives.h
--- a/source/common/primitives.h Mon Sep 02 16:16:04 2013 +0530
+++ b/source/common/primitives.h Tue Sep 03 15:47:01 2013 +0800
@@ -226,6 +226,8 @@
typedef void (*filterRowH_t)(pixel *src, intptr_t srcStride, short* midA, short* midB, short* midC, intptr_t midStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow);
typedef void (*filterRowV_0_t)(pixel *src, intptr_t srcStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow);
typedef void (*filterRowV_N_t)(short *midA, intptr_t midStride, pixel *dstA, pixel *dstB, pixel *dstC, intptr_t dstStride, int width, int height, int marginX, int marginY, int row, int isLastRow);
+typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int width, int height, int marginX);
+
typedef void (*weightpUni_t)(short *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
@@ -267,6 +269,8 @@
filterRowH_t filterRowH;
filterRowV_0_t filterRowV_0;
filterRowV_N_t filterRowV_N;
+ extendCURowBorder_t extendRowBorder;
+
intra_dc_t intra_pred_dc;
intra_planar_t intra_pred_planar;
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/common/wavefront.cpp
--- a/source/common/wavefront.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/common/wavefront.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -41,6 +41,11 @@
m_queuedBitmap = new uint64_t[m_numWords];
if (m_queuedBitmap)
memset((void*)m_queuedBitmap, 0, sizeof(uint64_t) * m_numWords);
+
+ m_enableBitmap = new uint64_t[m_numWords];
+ if (m_enableBitmap)
+ memset((void*)m_enableBitmap, 0, sizeof(uint64_t) * m_numWords);
+
return m_queuedBitmap != NULL;
}
@@ -54,6 +59,11 @@
delete[] m_queuedBitmap;
m_queuedBitmap = NULL;
}
+ if (m_enableBitmap)
+ {
+ delete[] m_enableBitmap;
+ m_enableBitmap = NULL;
+ }
}
void WaveFront::enqueueRow(int row)
@@ -66,6 +76,15 @@
m_pool->pokeIdleThread();
}
+void WaveFront::enableRow(int row)
+{
+ // thread safe
+ uint64_t bit = 1LL << (row & 63);
+
+ assert(row < m_numRows);
+ ATOMIC_OR(&m_enableBitmap[row >> 6], bit);
+}
+
bool WaveFront::checkHigherPriorityRow(int curRow)
{
int fullwords = curRow >> 6;
@@ -95,8 +114,14 @@
break;
CTZ64(id, oldval);
+
+ // NOTE: if the lowest row is unavailable, so we don't check higher row
+ if (!(m_enableBitmap[w] & (1LL << id)))
+ {
+ return false;
+ }
+
uint64_t newval = oldval & ~(1LL << id);
-
if (ATOMIC_CAS(&m_queuedBitmap[w], oldval, newval) == oldval)
{
// we cleared the bit, process row
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/common/wavefront.h
--- a/source/common/wavefront.h Mon Sep 02 16:16:04 2013 +0530
+++ b/source/common/wavefront.h Tue Sep 03 15:47:01 2013 +0800
@@ -40,6 +40,7 @@
// bitmap of rows queued for processing, uses atomic intrinsics
uint64_t volatile *m_queuedBitmap;
+ uint64_t volatile *m_enableBitmap;
// number of words in the bitmap
int m_numWords;
@@ -64,6 +65,8 @@
// This provider must be enqueued in the pool before enqueuing a row
void enqueueRow(int row);
+ void enableRow(int row);
+
// Returns true if a row above curRow is available for processing. The processRow()
// method may call this function periodically and voluntarily exit
bool checkHigherPriorityRow(int curRow);
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/encoder/dpb.cpp
--- a/source/encoder/dpb.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/encoder/dpb.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -51,7 +51,6 @@
if (pic->getSlice()->isReferenced() == false && pic->m_countRefEncoders == 0)
{
pic->getPicYuvRec()->clearReferences();
- pic->getPicYuvRec()->clearExtendedFlag();
pic->m_reconRowCount = 0;
// iterator is invalidated by remove, restart scan
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/encoder/frameencoder.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -55,6 +55,7 @@
, m_frameFilter(NULL)
, m_pic(NULL)
, m_rows(NULL)
+ , m_threadActive(true)
{
}
@@ -68,6 +69,10 @@
{
JobProvider::flush(); // ensure no worker threads are using this frame
+ // TODO: waitting thread exit
+ m_threadActive = false;
+ m_enable.trigger();
+
if (m_rows)
{
for (int i = 0; i < m_numRows; ++i)
@@ -79,6 +84,8 @@
}
m_frameFilter.destroy();
+
+ stop();
}
void FrameEncoder::init(TEncTop *top, int numRows)
@@ -143,6 +150,7 @@
printf("error : ScalingList == %d not supported\n", m_top->getUseScalingListId());
assert(0);
}
+ start();
}
int FrameEncoder::getStreamHeaders(AccessUnit& accessUnit)
@@ -874,7 +882,9 @@
m_rows[i].init();
m_rows[i].m_entropyCoder.setEntropyCoder(&m_sbacCoder, pic->getSlice());
m_rows[i].m_entropyCoder.resetEntropy();
+
m_rows[i].m_rdSbacCoders[0][CI_CURR_BEST]->load(&m_sbacCoder);
+
m_pic->m_complete_enc[i] = 0;
}
@@ -897,22 +907,15 @@
for (Int ref = 0; ref < slice->getNumRefIdx(list); ref++)
{
TComPic *refpic = slice->getRefPic(list, ref);
- while (refpic->m_reconRowCount <= (UInt) row)
+ while ((refpic->m_reconRowCount != (UInt)m_numRows) && (refpic->m_reconRowCount < (UInt) row + 2))
refpic->m_reconRowWait.wait();
min = X265_MIN(min, refpic->m_reconRowCount);
}
}
m_referenceRowsAvailable = min;
- row = min;
-#if 0 // incomplete signaling of available recon reference rows
- if (row > 0)
- {
- if (!m_rows[row + 1].m_active && (row == 1 || m_pic->m_complete_enc[row - 2] > 1))
- WaveFront::enqueueRow(row - 1);
- }
-#endif
+ WaveFront::enableRow(row);
}
WaveFront::enqueueRow(0);
@@ -959,6 +962,7 @@
codeRow.m_entropyCoder.setEntropyCoder(&m_sbacCoder, m_pic->getSlice());
codeRow.m_entropyCoder.resetEntropy();
+ codeRow.m_search.m_referenceRowsAvailable = m_referenceRowsAvailable;
TEncSbac *bufSbac = (m_cfg->param.bEnableWavefront && col == 0 && row > 0) ? &m_rows[row - 1].m_bufferSbacCoder : NULL;
codeRow.processCU(cu, m_pic->getSlice(), bufSbac, m_cfg->param.bEnableWavefront && col == 1);
@@ -1015,6 +1019,7 @@
if (m_pic)
{
/* TODO: frame parallelism - block here until worker thread completes */
+ m_done.wait();
TComPic *ret = m_pic;
m_pic = NULL;
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Mon Sep 02 16:16:04 2013 +0530
+++ b/source/encoder/frameencoder.h Tue Sep 03 15:47:01 2013 +0800
@@ -50,7 +50,7 @@
class ThreadPool;
// Manages the wave-front processing of a single encoding frame
-class FrameEncoder : public WaveFront
+class FrameEncoder : public WaveFront, public x265::Thread
{
public:
@@ -180,6 +180,23 @@
TComPic *getEncodedPicture(AccessUnit& accessUnit);
+ // Frame parallelism
+ void threadMain(void)
+ {
+ while(m_threadActive)
+ {
+ m_enable.wait();
+ if (!m_threadActive)
+ break;
+ compressFrame(m_pic);
+ m_done.trigger();
+ }
+ }
+
+ Event m_enable;
+ Event m_done;
+ bool m_threadActive;
+
SEIWriter m_seiWriter;
TComSPS m_sps;
TComPPS m_pps;
@@ -199,7 +216,7 @@
/* Picture being encoded, and its output NAL list */
TComPic* m_pic;
AccessUnit m_accessUnit;
- int m_referenceRowsAvailable;
+ volatile int m_referenceRowsAvailable;
int m_numRows;
int row_delay;
diff -r 2f9fcf768918 -r bc1887f2bbc4 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Mon Sep 02 16:16:04 2013 +0530
+++ b/source/encoder/framefilter.cpp Tue Sep 03 15:47:01 2013 +0800
@@ -226,10 +226,44 @@
// TODO: extend margins for motion reference
- // Notify other FrameEncoders that this row of reconstructed pixels is available
- m_pic->m_reconRowCount++;
- for (UInt i = 0; i < m_pic->m_countRefEncoders; i++)
- m_pic->m_reconRowWait.trigger();
+ TComPicYuv *recon = m_pic->getPicYuvRec();
+ if (row > 0)
+ {
+ // TODO: Remove when we confirm below code is right
+ //recon->xExtendPicCompBorder(recon->getLumaAddr(), recon->getStride(), recon->getWidth(), recon->getHeight(), recon->m_lumaMarginX, recon->m_lumaMarginY);
+ //recon->xExtendPicCompBorder(recon->getCbAddr(), recon->getCStride(), recon->getWidth() >> 1, recon->getHeight() >> 1, recon->m_chromaMarginX, recon->m_chromaMarginY);
+ //recon->xExtendPicCompBorder(recon->getCrAddr(), recon->getCStride(), recon->getWidth() >> 1, recon->getHeight() >> 1, recon->m_chromaMarginX, recon->m_chromaMarginY);
+ // Border extend Left and Right
+ primitives.extendRowBorder(recon->getLumaAddr(lineStartCUAddr - numCols), recon->getStride(), recon->getWidth(), g_maxCUHeight, recon->getLumaMarginX());
+ primitives.extendRowBorder(recon->getCbAddr(lineStartCUAddr - numCols), recon->getCStride(), recon->getWidth() >> 1, g_maxCUHeight >> 1, recon->getChromaMarginX());
+ primitives.extendRowBorder(recon->getCrAddr(lineStartCUAddr - numCols), recon->getCStride(), recon->getWidth() >> 1, g_maxCUHeight >> 1, recon->getChromaMarginX());
+
+ // Border extend Top
+ if (row == 1)
+ {
+ const intptr_t stride = recon->getStride();
+ const intptr_t strideC = recon->getCStride();
+ Pel *pixY = recon->getLumaAddr(lineStartCUAddr - numCols) - recon->getLumaMarginX();
+ Pel *pixU = recon->getCbAddr(lineStartCUAddr - numCols) - recon->getChromaMarginX();
+ Pel *pixV = recon->getCrAddr(lineStartCUAddr - numCols) - recon->getChromaMarginX();
+
+ for(int y = 0; y < recon->getLumaMarginY(); y++)
+ {
+ memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(Pel));
+ }
+
+ for(int y = 0; y < recon->getChromaMarginY(); y++)
+ {
+ memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(Pel));
+ memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(Pel));
+ }
+ }
+
+ // Notify other FrameEncoders that this row of reconstructed pixels is available
+ m_pic->m_reconRowCount++;
+ for (UInt i = 0; i < m_pic->m_countRefEncoders; i++)
+ m_pic->m_reconRowWait.trigger();
+ }
if (row == m_numRows - 1)
{
@@ -247,5 +281,40 @@
}
m_completionEvent.trigger();
+
+ // TODO: Remove when we confirm below code is right
+ //recon->xExtendPicCompBorder(recon->getLumaAddr(), recon->getStride(), recon->getWidth(), recon->getHeight(), recon->m_lumaMarginX, recon->m_lumaMarginY);
+ //recon->xExtendPicCompBorder(recon->getCbAddr(), recon->getCStride(), recon->getWidth() >> 1, recon->getHeight() >> 1, recon->m_chromaMarginX, recon->m_chromaMarginY);
+ //recon->xExtendPicCompBorder(recon->getCrAddr(), recon->getCStride(), recon->getWidth() >> 1, recon->getHeight() >> 1, recon->m_chromaMarginX, recon->m_chromaMarginY);
+ // Border extend Left and Right
+ const int realH = ((recon->getHeight() % g_maxCUHeight) ? (recon->getHeight() % g_maxCUHeight) : g_maxCUHeight);
+ primitives.extendRowBorder(recon->getLumaAddr(lineStartCUAddr), recon->getStride(), recon->getWidth(), realH, recon->getLumaMarginX());
+ primitives.extendRowBorder(recon->getCbAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> 1, realH >> 1, recon->getChromaMarginX());
+ primitives.extendRowBorder(recon->getCrAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> 1, realH >> 1, recon->getChromaMarginX());
+
+ // Border extend Bottom
+ {
+ const intptr_t stride = recon->getStride();
+ const intptr_t strideC = recon->getCStride();
+ Pel *pixY = recon->getLumaAddr(lineStartCUAddr) - recon->getLumaMarginX() + (realH - 1) * stride;
+ Pel *pixU = recon->getCbAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> 1) - 1) * strideC;
+ Pel *pixV = recon->getCrAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> 1) - 1) * strideC;
+
+ for(int y = 0; y < recon->getLumaMarginY(); y++)
+ {
+ memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(Pel));
+ }
+
+ for(int y = 0; y < recon->getChromaMarginY(); y++)
+ {
+ memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(Pel));
+ memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(Pel));
+ }
+ }
+
+ // Notify other FrameEncoders that this row of reconstructed pixels is available
+ m_pic->m_reconRowCount++;
+ for (UInt i = 0; i < m_pic->m_countRefEncoders; i++)
+ m_pic->m_reconRowWait.trigger();
}
}
More information about the x265-devel
mailing list