[x265] [PATCH] framepp: Active frame parallelism

Min Chen chenm003 at 163.com
Tue Sep 3 07:52:41 CEST 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1378187513 -28800
# Node ID 0551520fa4703a1f927b0baa4de905d84aaa106f
# Parent  3ea029900ab3ee58ed6b16c5c5a0a89975ba8c03
framepp: Active frame parallelism

diff -r 3ea029900ab3 -r 0551520fa470 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -2715,14 +2715,17 @@
     return false;
 }
 
-Void TComDataCU::clipMv(MV& outMV)
+Void TComDataCU::clipMv(MV& outMV, int rowsAvailable)
 {
     Int mvshift = 2;
     Int offset = 8;
     Int xmax = (m_slice->getSPS()->getPicWidthInLumaSamples() + offset - m_cuPelX - 1) << mvshift;
     Int xmin = (-(Int)g_maxCUWidth - offset - (Int)m_cuPelX + 1) << mvshift;
 
-    Int ymax = (m_slice->getSPS()->getPicHeightInLumaSamples() + offset - m_cuPelY - 1) << mvshift;
+    int ylimit = m_slice->getSPS()->getPicHeightInLumaSamples();
+    if (rowsAvailable)
+        ylimit = X265_MIN(rowsAvailable * g_maxCUHeight, ylimit);
+    Int ymax = (ylimit + offset - m_cuPelY - 1) << mvshift;
     Int ymin = (-(Int)g_maxCUHeight - offset - (Int)m_cuPelY + 1) << mvshift;
 
     outMV.x = min(xmax, max(xmin, (Int)outMV.x));
diff -r 3ea029900ab3 -r 0551520fa470 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.h	Tue Sep 03 13:51:53 2013 +0800
@@ -448,7 +448,7 @@
     Void          setMVPIdxSubParts(Int mvpIdx, RefPicList picList, UInt absPartIdx, UInt partIdx, UInt depth);
     Void          setMVPNumSubParts(Int iMVPNum, RefPicList picList, UInt absPartIdx, UInt partIdx, UInt depth);
 
-    Void          clipMv(x265::MV& outMV);
+    Void          clipMv(x265::MV& outMV, int rowsAvailable = 0);
 
     Void          getMvPredLeft(x265::MV& mvPred)       { mvPred = m_mvFieldA.mv; }
 
diff -r 3ea029900ab3 -r 0551520fa470 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -60,8 +60,6 @@
     m_picOrgV = NULL;
 
     m_refList = NULL;
-
-    m_bIsBorderExtended = false;
 }
 
 TComPicYuv::~TComPicYuv()
@@ -98,8 +96,6 @@
     m_picOrgU = m_picBufU + m_chromaMarginY * getCStride() + m_chromaMarginX;
     m_picOrgV = m_picBufV + m_chromaMarginY * getCStride() + m_chromaMarginX;
 
-    m_bIsBorderExtended = false;
-
     m_cuOffsetY = new Int[numCuInWidth * numCuInHeight];
     m_cuOffsetC = new Int[numCuInWidth * numCuInHeight];
     for (Int cuRow = 0; cuRow < numCuInHeight; cuRow++)
@@ -240,14 +236,8 @@
 
 x265::MotionReference* TComPicYuv::generateMotionReference(wpScalingParam *w)
 {
-    if (!m_bIsBorderExtended)
-    {
-        /* HPEL generation requires luma integer plane to already be extended */
-        xExtendPicCompBorder(getLumaAddr(), getStride(), getWidth(), getHeight(), m_lumaMarginX, m_lumaMarginY);
-        xExtendPicCompBorder(getCbAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);
-        xExtendPicCompBorder(getCrAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);
-        m_bIsBorderExtended = true;
-    }
+    /* HPEL generation requires luma integer plane to already be extended */
+    // NOTE: We extend border every CURow, so I remove code here
 
     MotionReference *mref;
     for (mref = m_refList; mref != NULL; mref = mref->m_next)
diff -r 3ea029900ab3 -r 0551520fa470 source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Tue Sep 03 13:51:53 2013 +0800
@@ -94,8 +94,6 @@
     Int   m_stride;
     Int   m_strideC;
 
-    Bool  m_bIsBorderExtended;
-
 public:
     Int   m_numCuInWidth;
     Int   m_numCuInHeight;
@@ -132,7 +130,9 @@
 
     Int   getLumaMarginY() { return m_lumaMarginY; }
 
-    Int   getChromaMargin() { return m_chromaMarginX; }
+    Int   getChromaMarginX() { return m_chromaMarginX; }
+
+    Int   getChromaMarginY() { return m_chromaMarginY; }
 
     // ------------------------------------------------------------------------------------------------
     //  Access function for picture buffer
@@ -181,9 +181,6 @@
     //  Dump picture
     Void  dump(Char* pFileName, Bool bAdd = false);
 
-    // Set border extension flag
-    Void  clearExtendedFlag() { m_bIsBorderExtended = false; }
-
     friend class x265::MotionReference;
 }; // END CLASS DEFINITION TComPicYuv
 
diff -r 3ea029900ab3 -r 0551520fa470 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -452,7 +452,6 @@
     if (cu->getSlice()->getSliceType() != I_SLICE)
         fprintf(fp1, "\n CU number : %d ", totalCU);
 #endif
-    //printf("compressCU[%2d]: Best=0x%08X, Temp=0x%08X\n", omp_get_thread_num(), m_ppcBestCU[0], m_ppcTempCU[0]);
 
     m_addSADDepth      = 0;
     m_LCUPredictionSAD = 0;
diff -r 3ea029900ab3 -r 0551520fa470 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -2946,6 +2946,7 @@
 UInt TEncSearch::xGetTemplateCost(TComDataCU* cu, UInt partAddr, TComYuv* templateCand, MV mvCand, Int mvpIdx,
                                   Int mvpCandCount, RefPicList picList, Int refIdx, Int sizex, Int sizey)
 {
+    // TODO: does it clip with m_referenceRowsAvailable?
     cu->clipMv(mvCand);
 
     // prediction pattern
@@ -3013,8 +3014,8 @@
     mvmin = mvp - dist;
     mvmax = mvp + dist;
 
-    cu->clipMv(mvmin);
-    cu->clipMv(mvmax);
+    cu->clipMv(mvmin, m_referenceRowsAvailable);
+    cu->clipMv(mvmax, m_referenceRowsAvailable);
 
     mvmin >>= 2;
     mvmax >>= 2;
diff -r 3ea029900ab3 -r 0551520fa470 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Tue Sep 03 13:51:53 2013 +0800
@@ -68,6 +68,7 @@
 public:
 
     x265::MotionEstimate m_me;
+    int             m_referenceRowsAvailable;
 
 protected:
 
diff -r 3ea029900ab3 -r 0551520fa470 source/Lib/TLibEncoder/TEncTop.cpp
--- a/source/Lib/TLibEncoder/TEncTop.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncTop.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -268,7 +268,7 @@
         }
 
         // main encode processing, TBD multi-threading
-        curEncoder->compressFrame(fenc);
+        curEncoder->m_enable.trigger();
     }
 
     return ret;
diff -r 3ea029900ab3 -r 0551520fa470 source/common/common.cpp
--- a/source/common/common.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/common/common.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -260,6 +260,10 @@
     CONFIRM(param->rc.rateControlMode<X265_RC_ABR || param->rc.rateControlMode> X265_RC_CRF,
             "Rate control mode is out of range");
 
+    // TODO: in this condition, we POC system will fuzzy, here I use keyframeMax because minimal lookahead is keyframeMax
+    CONFIRM(param->frameNumThreads>param->keyframeMax,
+            "Frame Parallelism Threads must be less or equal to Lookahead(keyframeMax) frame number");
+
     // max CU size should be power of 2
     uint32_t ui = param->maxCUSize;
     while (ui)
@@ -363,6 +367,7 @@
         x265_log(param, X265_LOG_INFO, "RDpenalty                    : %d\n", param->rdPenalty);
     }
     x265_log(param, X265_LOG_INFO, "Lookahead len / -b / bAdapt  : %d / %d / %d\n", param->lookaheadDepth, param->bframes, param->bFrameAdaptive);
+    x265_log(param, X265_LOG_INFO, "Frame parallelism thread     : %d\n", param->frameNumThreads);
     x265_log(param, X265_LOG_INFO, "tools: ");
 #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, "%s ", STR)
     TOOLOPT(param->bEnableRectInter, "rect");
diff -r 3ea029900ab3 -r 0551520fa470 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/common/ipfilter.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -777,5 +777,7 @@
     p.filterHwghtd = filterHorizontalWeighted;
     
     p.filterHCU = filterHorizontalExtendCol;
+
+    p.extendRowBorder = extendCURowColBorder;
 }
 }
diff -r 3ea029900ab3 -r 0551520fa470 source/common/primitives.h
--- a/source/common/primitives.h	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/common/primitives.h	Tue Sep 03 13:51:53 2013 +0800
@@ -226,6 +226,8 @@
 typedef void (*filterRowH_t)(pixel *src, intptr_t srcStride, short* midA, short* midB, short* midC, intptr_t midStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow);
 typedef void (*filterRowV_0_t)(pixel *src, intptr_t srcStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow);
 typedef void (*filterRowV_N_t)(short *midA, intptr_t midStride, pixel *dstA, pixel *dstB, pixel *dstC, intptr_t dstStride, int width, int height, int marginX, int marginY, int row, int isLastRow);
+typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int width, int height, int marginX);
+
 
 typedef void (*weightpUni_t)(short *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
 typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
@@ -269,6 +271,8 @@
     filterRowH_t    filterRowH;
     filterRowV_0_t  filterRowV_0;
     filterRowV_N_t  filterRowV_N;
+    extendCURowBorder_t extendRowBorder;
+
 
     intra_dc_t      intra_pred_dc;
     intra_planar_t  intra_pred_planar;
diff -r 3ea029900ab3 -r 0551520fa470 source/common/wavefront.cpp
--- a/source/common/wavefront.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/common/wavefront.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -41,6 +41,11 @@
         m_queuedBitmap = new uint64_t[m_numWords];
         if (m_queuedBitmap)
             memset((void*)m_queuedBitmap, 0, sizeof(uint64_t) * m_numWords);
+
+        m_enableBitmap = new uint64_t[m_numWords];
+        if (m_enableBitmap)
+            memset((void*)m_enableBitmap, 0, sizeof(uint64_t) * m_numWords);
+        
         return m_queuedBitmap != NULL;
     }
 
@@ -54,6 +59,11 @@
         delete[] m_queuedBitmap;
         m_queuedBitmap = NULL;
     }
+    if (m_enableBitmap)
+    {
+        delete[] m_enableBitmap;
+        m_enableBitmap = NULL;
+    }
 }
 
 void WaveFront::enqueueRow(int row)
@@ -66,6 +76,15 @@
     m_pool->pokeIdleThread();
 }
 
+void WaveFront::enableRow(int row)
+{
+    // thread safe
+    uint64_t bit = 1LL << (row & 63);
+
+    assert(row < m_numRows);
+    ATOMIC_OR(&m_enableBitmap[row >> 6], bit);
+}
+
 bool WaveFront::checkHigherPriorityRow(int curRow)
 {
     int fullwords = curRow >> 6;
@@ -95,8 +114,14 @@
                 break;
 
             CTZ64(id, oldval);
+
+            // NOTE: if the lowest row is unavailable, so we don't check higher row
+            if (!(m_enableBitmap[w] & (1LL << id)))
+            {
+                return false;
+            }
+
             uint64_t newval = oldval & ~(1LL << id);
-
             if (ATOMIC_CAS(&m_queuedBitmap[w], oldval, newval) == oldval)
             {
                 // we cleared the bit, process row
diff -r 3ea029900ab3 -r 0551520fa470 source/common/wavefront.h
--- a/source/common/wavefront.h	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/common/wavefront.h	Tue Sep 03 13:51:53 2013 +0800
@@ -40,6 +40,7 @@
 
     // bitmap of rows queued for processing, uses atomic intrinsics
     uint64_t volatile *m_queuedBitmap;
+    uint64_t volatile *m_enableBitmap;
 
     // number of words in the bitmap
     int m_numWords;
@@ -64,6 +65,8 @@
     // This provider must be enqueued in the pool before enqueuing a row
     void enqueueRow(int row);
 
+    void enableRow(int row);
+
     // Returns true if a row above curRow is available for processing.  The processRow()
     // method may call this function periodically and voluntarily exit
     bool checkHigherPriorityRow(int curRow);
diff -r 3ea029900ab3 -r 0551520fa470 source/encoder/dpb.cpp
--- a/source/encoder/dpb.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/encoder/dpb.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -51,7 +51,6 @@
         if (pic->getSlice()->isReferenced() == false && pic->m_countRefEncoders == 0)
         {
             pic->getPicYuvRec()->clearReferences();
-            pic->getPicYuvRec()->clearExtendedFlag();
             pic->m_reconRowCount = 0;
 
             // iterator is invalidated by remove, restart scan
diff -r 3ea029900ab3 -r 0551520fa470 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/encoder/frameencoder.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -55,6 +55,7 @@
     , m_frameFilter(NULL)
     , m_pic(NULL)
     , m_rows(NULL)
+    , m_threadActive(true)
 {
 }
 
@@ -68,6 +69,10 @@
 {
     JobProvider::flush();  // ensure no worker threads are using this frame
 
+    // TODO: waitting thread exit
+    m_threadActive = false;
+    m_enable.trigger();
+
     if (m_rows)
     {
         for (int i = 0; i < m_numRows; ++i)
@@ -79,6 +84,8 @@
     }
 
     m_frameFilter.destroy();
+
+    stop();
 }
 
 void FrameEncoder::init(TEncTop *top, int numRows)
@@ -143,6 +150,7 @@
         printf("error : ScalingList == %d not supported\n", m_top->getUseScalingListId());
         assert(0);
     }
+    start();
 }
 
 int FrameEncoder::getStreamHeaders(AccessUnit& accessUnit)
@@ -874,7 +882,9 @@
         m_rows[i].init();
         m_rows[i].m_entropyCoder.setEntropyCoder(&m_sbacCoder, pic->getSlice());
         m_rows[i].m_entropyCoder.resetEntropy();
+
         m_rows[i].m_rdSbacCoders[0][CI_CURR_BEST]->load(&m_sbacCoder);
+
         m_pic->m_complete_enc[i] = 0;
     }
 
@@ -897,22 +907,15 @@
                 for (Int ref = 0; ref < slice->getNumRefIdx(list); ref++)
                 {
                     TComPic *refpic = slice->getRefPic(list, ref);
-                    while (refpic->m_reconRowCount <= (UInt) row)
+                    while ((refpic->m_reconRowCount != (UInt)m_numRows) && (refpic->m_reconRowCount < (UInt) row + 2))
                         refpic->m_reconRowWait.wait();
                     min = X265_MIN(min, refpic->m_reconRowCount);
                 }
             }
 
             m_referenceRowsAvailable = min;
-            row = min;
 
-#if 0 // incomplete signaling of available recon reference rows
-            if (row > 0)
-            {
-                if (!m_rows[row + 1].m_active && (row == 1 || m_pic->m_complete_enc[row - 2] > 1))
-                    WaveFront::enqueueRow(row - 1);
-            }
-#endif
+            WaveFront::enableRow(row);
         }
 
         WaveFront::enqueueRow(0);
@@ -959,6 +962,7 @@
         codeRow.m_entropyCoder.setEntropyCoder(&m_sbacCoder, m_pic->getSlice());
         codeRow.m_entropyCoder.resetEntropy();
 
+        codeRow.m_search.m_referenceRowsAvailable = m_referenceRowsAvailable;
         TEncSbac *bufSbac = (m_cfg->param.bEnableWavefront && col == 0 && row > 0) ? &m_rows[row - 1].m_bufferSbacCoder : NULL;
         codeRow.processCU(cu, m_pic->getSlice(), bufSbac, m_cfg->param.bEnableWavefront && col == 1);
 
@@ -1015,6 +1019,7 @@
     if (m_pic)
     {
         /* TODO: frame parallelism - block here until worker thread completes */
+        m_done.wait();
 
         TComPic *ret = m_pic;
         m_pic = NULL;
diff -r 3ea029900ab3 -r 0551520fa470 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/encoder/frameencoder.h	Tue Sep 03 13:51:53 2013 +0800
@@ -50,7 +50,7 @@
 class ThreadPool;
 
 // Manages the wave-front processing of a single encoding frame
-class FrameEncoder : public WaveFront
+class FrameEncoder : public WaveFront, public x265::Thread
 {
 public:
 
@@ -180,6 +180,23 @@
 
     TComPic *getEncodedPicture(AccessUnit& accessUnit);
 
+    // Frame parallelism
+    void threadMain(void)
+    {
+        while(m_threadActive)
+        {
+            m_enable.wait();
+            if (!m_threadActive)
+                break;
+            compressFrame(m_pic);
+            m_done.trigger();
+        }
+    }
+
+    Event                    m_enable;
+    Event                    m_done;
+    bool                     m_threadActive;
+
     SEIWriter                m_seiWriter;
     TComSPS                  m_sps;
     TComPPS                  m_pps;
@@ -199,7 +216,7 @@
     /* Picture being encoded, and its output NAL list */
     TComPic*                 m_pic;
     AccessUnit               m_accessUnit;
-    int                      m_referenceRowsAvailable;
+    volatile int             m_referenceRowsAvailable;
 
     int                      m_numRows;
     int                      row_delay;
diff -r 3ea029900ab3 -r 0551520fa470 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Mon Sep 02 12:40:15 2013 +0530
+++ b/source/encoder/framefilter.cpp	Tue Sep 03 13:51:53 2013 +0800
@@ -226,10 +226,44 @@
 
     // TODO: extend margins for motion reference
 
-    // Notify other FrameEncoders that this row of reconstructed pixels is available
-    m_pic->m_reconRowCount++;
-    for (UInt i = 0; i < m_pic->m_countRefEncoders; i++)
-        m_pic->m_reconRowWait.trigger();
+    TComPicYuv *recon = m_pic->getPicYuvRec();
+    if (row > 0)
+    {
+        // TODO: Remove when we confirm below code is right
+        //recon->xExtendPicCompBorder(recon->getLumaAddr(), recon->getStride(), recon->getWidth(), recon->getHeight(), recon->m_lumaMarginX, recon->m_lumaMarginY);
+        //recon->xExtendPicCompBorder(recon->getCbAddr(), recon->getCStride(), recon->getWidth() >> 1, recon->getHeight() >> 1, recon->m_chromaMarginX, recon->m_chromaMarginY);
+        //recon->xExtendPicCompBorder(recon->getCrAddr(), recon->getCStride(), recon->getWidth() >> 1, recon->getHeight() >> 1, recon->m_chromaMarginX, recon->m_chromaMarginY);
+        // Border extend Left and Right
+        primitives.extendRowBorder(recon->getLumaAddr(lineStartCUAddr - numCols), recon->getStride(), recon->getWidth(), g_maxCUHeight, recon->getLumaMarginX());
+        primitives.extendRowBorder(recon->getCbAddr(lineStartCUAddr - numCols), recon->getCStride(), recon->getWidth() >> 1, g_maxCUHeight >> 1, recon->getChromaMarginX());
+        primitives.extendRowBorder(recon->getCrAddr(lineStartCUAddr - numCols), recon->getCStride(), recon->getWidth() >> 1, g_maxCUHeight >> 1, recon->getChromaMarginX());
+
+        // Border extend Top
+        if (row == 1)
+        {
+            const intptr_t stride = recon->getStride();
+            const intptr_t strideC = recon->getCStride();
+            Pel *pixY = recon->getLumaAddr(lineStartCUAddr - numCols) - recon->getLumaMarginX();
+            Pel *pixU = recon->getCbAddr(lineStartCUAddr - numCols) - recon->getChromaMarginX();
+            Pel *pixV = recon->getCrAddr(lineStartCUAddr - numCols) - recon->getChromaMarginX();
+
+            for(int y = 0; y < recon->getLumaMarginY(); y++)
+            {
+                memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(Pel));
+            }
+
+            for(int y = 0; y < recon->getChromaMarginY(); y++)
+            {
+                memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(Pel));
+                memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(Pel));
+            }
+        }
+
+        // Notify other FrameEncoders that this row of reconstructed pixels is available
+        m_pic->m_reconRowCount++;
+        for (UInt i = 0; i < m_pic->m_countRefEncoders; i++)
+            m_pic->m_reconRowWait.trigger();
+    }
 
     if (row == m_numRows - 1)
     {
@@ -247,5 +281,40 @@
         }
 
         m_completionEvent.trigger();
+
+        // TODO: Remove when we confirm below code is right
+        //recon->xExtendPicCompBorder(recon->getLumaAddr(), recon->getStride(), recon->getWidth(), recon->getHeight(), recon->m_lumaMarginX, recon->m_lumaMarginY);
+        //recon->xExtendPicCompBorder(recon->getCbAddr(), recon->getCStride(), recon->getWidth() >> 1, recon->getHeight() >> 1, recon->m_chromaMarginX, recon->m_chromaMarginY);
+        //recon->xExtendPicCompBorder(recon->getCrAddr(), recon->getCStride(), recon->getWidth() >> 1, recon->getHeight() >> 1, recon->m_chromaMarginX, recon->m_chromaMarginY);
+        // Border extend Left and Right
+        const int realH = ((recon->getHeight() % g_maxCUHeight) ? (recon->getHeight() % g_maxCUHeight) : g_maxCUHeight);
+        primitives.extendRowBorder(recon->getLumaAddr(lineStartCUAddr), recon->getStride(), recon->getWidth(), realH, recon->getLumaMarginX());
+        primitives.extendRowBorder(recon->getCbAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> 1, realH >> 1, recon->getChromaMarginX());
+        primitives.extendRowBorder(recon->getCrAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> 1, realH >> 1, recon->getChromaMarginX());
+
+        // Border extend Bottom
+        {
+            const intptr_t stride = recon->getStride();
+            const intptr_t strideC = recon->getCStride();
+            Pel *pixY = recon->getLumaAddr(lineStartCUAddr) - recon->getLumaMarginX() + (realH - 1) * stride;
+            Pel *pixU = recon->getCbAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> 1) - 1) * strideC;
+            Pel *pixV = recon->getCrAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> 1) - 1) * strideC;
+
+            for(int y = 0; y < recon->getLumaMarginY(); y++)
+            {
+                memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(Pel));
+            }
+
+            for(int y = 0; y < recon->getChromaMarginY(); y++)
+            {
+                memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(Pel));
+                memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(Pel));
+            }
+        }
+
+        // Notify other FrameEncoders that this row of reconstructed pixels is available
+        m_pic->m_reconRowCount++;
+        for (UInt i = 0; i < m_pic->m_countRefEncoders; i++)
+            m_pic->m_reconRowWait.trigger();
     }
 }



More information about the x265-devel mailing list