[x265] [PATCH] refactor on FrameFilter and ParallelFilter, reduce duplicated data field

Deepthi Nandakumar deepthi at multicorewareinc.com
Wed Jan 20 04:56:07 CET 2016


Excellent, thanks Min. This is exactly what I had in mind.

On Tue, Jan 19, 2016 at 5:30 PM, Min Chen <chenm003 at 163.com> wrote:

> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1453204681 -28800
> # Node ID 08dadacfe2cddfdea2c3a1e6f523c17ffa74bf09
> # Parent  765864c3c6f02e2a3ec426974de7df7bbec7de58
> refactor on FrameFilter and ParallelFilter, reduce duplicated data field
> ---
>  source/encoder/framefilter.cpp |  113
> +++++++++++++++++++--------------------
>  source/encoder/framefilter.h   |   32 ++++-------
>  2 files changed, 67 insertions(+), 78 deletions(-)
>
> diff -r 765864c3c6f0 -r 08dadacfe2cd source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp    Tue Jan 19 19:28:58 2016 +0800
> +++ b/source/encoder/framefilter.cpp    Tue Jan 19 19:58:01 2016 +0800
> @@ -57,12 +57,14 @@
>      m_param = top->m_param;
>      m_frameEncoder = frame;
>      m_numRows = numRows;
> +    m_numCols = numCols;
>      m_hChromaShift = CHROMA_H_SHIFT(m_param->internalCsp);
>      m_vChromaShift = CHROMA_V_SHIFT(m_param->internalCsp);
>      m_pad[0] = top->m_sps.conformanceWindow.rightOffset;
>      m_pad[1] = top->m_sps.conformanceWindow.bottomOffset;
>      m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
>      m_lastHeight = (m_param->sourceHeight % g_maxCUSize) ?
> (m_param->sourceHeight % g_maxCUSize) : g_maxCUSize;
> +    m_lastWidth = (m_param->sourceWidth % g_maxCUSize) ?
> (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;
>
>      if (m_param->bEnableSsim)
>          m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
> @@ -86,18 +88,13 @@
>              }
>          }
>
> -        const int lastWidth = (m_param->sourceWidth % g_maxCUSize) ?
> (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;
>          for(int row = 0; row < numRows; row++)
>          {
>              // Setting maximum bound information
> -            m_parallelFilter[row].m_numCols = numCols;
> -            m_parallelFilter[row].m_numRows = numRows;
>              m_parallelFilter[row].m_rowHeight = (row == numRows - 1) ?
> m_lastHeight : g_maxCUSize;
> -            m_parallelFilter[row].m_lastWidth = lastWidth;
> -            m_parallelFilter[row].m_param = m_param;
>              m_parallelFilter[row].m_row = row;
>              m_parallelFilter[row].m_rowAddr = row * numCols;
> -            m_parallelFilter[row].m_frameEncoder = m_frameEncoder;
> +            m_parallelFilter[row].m_frameFilter = this;
>
>              if (row > 0)
>                  m_parallelFilter[row].m_prevRow = &m_parallelFilter[row -
> 1];
> @@ -122,7 +119,6 @@
>              m_parallelFilter[row].m_allowedCol.set(0);
>              m_parallelFilter[row].m_lastDeblocked.set(-1);
>              m_parallelFilter[row].m_encData = frame->m_encData;
> -            m_parallelFilter[row].m_frame = frame;
>          }
>
>          // Reset SAO common statistics
> @@ -189,7 +185,7 @@
>      X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth *
> ctuWidth, "m_tmpU buffer beyond bound write detected");
>
>      // Chroma
> -    if (m_param->internalCsp != X265_CSP_I400)
> +    if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
>      {
>          ctuWidth  >>= m_sao.m_hChromaShift;
>
> @@ -213,30 +209,30 @@
>
>      if (m_encData->m_slice->m_pps->bTransquantBypassEnabled)
>      {
> -        const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
> -        const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
> +        const CUGeom* cuGeoms = m_frameFilter->m_frameEncoder->m_cuGeoms;
> +        const uint32_t* ctuGeomMap =
> m_frameFilter->m_frameEncoder->m_ctuGeomMap;
>
>          uint32_t cuAddr = m_rowAddr + col;
>          const CUData* ctu = m_encData->getPicCTU(cuAddr);
> -        assert(m_frame->m_reconPic == m_encData->m_reconPic);
> -        origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]],
> *m_frame);
> +        assert(m_frameFilter->m_frame->m_reconPic ==
> m_encData->m_reconPic);
> +        origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]],
> *m_frameFilter->m_frame);
>      }
>  }
>
>  // NOTE: MUST BE delay a row when Deblock enabled, the Deblock will
> modify above pixels in Horizon pass
> -void FrameFilter::ParallelFilter::processPostCu(uint32_t col) const
> +void FrameFilter::ParallelFilter::processPostCu(int col) const
>  {
>      // Update finished CU cursor
> -    m_frame->m_reconColCount[m_row].set(col);
> +    m_frameFilter->m_frame->m_reconColCount[m_row].set(col);
>
>      // shortcut path for non-border area
> -    if ((col != 0) & (col != m_numCols - 1) & (m_row != 0) & (m_row !=
> m_numRows - 1))
> +    if ((col != 0) & (col != m_frameFilter->m_numCols - 1) & (m_row != 0)
> & (m_row != m_frameFilter->m_numRows - 1))
>          return;
>
> -    PicYuv *reconPic = m_frame->m_reconPic;
> +    PicYuv *reconPic = m_frameFilter->m_frame->m_reconPic;
>      const uint32_t lineStartCUAddr = m_rowAddr + col;
>      const int realH = getCUHeight();
> -    const int realW = getCUWidth(col);
> +    const int realW = m_frameFilter->getCUWidth(col);
>
>      const uint32_t lumaMarginX = reconPic->m_lumaMarginX;
>      const uint32_t lumaMarginY = reconPic->m_lumaMarginY;
> @@ -248,17 +244,17 @@
>      const intptr_t strideC = reconPic->m_strideC;
>      pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr);
>      // // MUST BE check I400 since m_picOrg uninitialize in that case
> -    pixel *pixU = (m_param->internalCsp != X265_CSP_I400) ?
> reconPic->getCbAddr(lineStartCUAddr) : NULL;
> -    pixel *pixV = (m_param->internalCsp != X265_CSP_I400) ?
> reconPic->getCrAddr(lineStartCUAddr) : NULL;
> +    pixel *pixU = (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
> ? reconPic->getCbAddr(lineStartCUAddr) : NULL;
> +    pixel *pixV = (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
> ? reconPic->getCrAddr(lineStartCUAddr) : NULL;
>      int copySizeY = realW;
>      int copySizeC = (realW >> hChromaShift);
>
> -    if ((col == 0) | (col == m_numCols - 1))
> +    if ((col == 0) | (col == m_frameFilter->m_numCols - 1))
>      {
>          // TODO: improve by process on Left or Right only
>          primitives.extendRowBorder(reconPic->getLumaAddr(m_rowAddr),
> stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
>
> -        if (m_param->internalCsp != X265_CSP_I400)
> +        if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
>          {
>              primitives.extendRowBorder(reconPic->getCbAddr(m_rowAddr),
> strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift,
> reconPic->m_chromaMarginX);
>              primitives.extendRowBorder(reconPic->getCrAddr(m_rowAddr),
> strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift,
> reconPic->m_chromaMarginX);
> @@ -266,7 +262,7 @@
>      }
>
>      // Extra Left and Right border on first and last CU
> -    if ((col == 0) | (col == m_numCols - 1))
> +    if ((col == 0) | (col == m_frameFilter->m_numCols - 1))
>      {
>          copySizeY += lumaMarginX;
>          copySizeC += chromaMarginX;
> @@ -286,7 +282,7 @@
>          for (uint32_t y = 0; y < lumaMarginY; y++)
>              memcpy(pixY - (y + 1) * stride, pixY, copySizeY *
> sizeof(pixel));
>
> -        if (m_param->internalCsp != X265_CSP_I400)
> +        if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
>          {
>              for (uint32_t y = 0; y < chromaMarginY; y++)
>              {
> @@ -297,7 +293,7 @@
>      }
>
>      // Border extend Bottom
> -    if (m_row == m_numRows - 1)
> +    if (m_row == m_frameFilter->m_numRows - 1)
>      {
>          pixY += (realH - 1) * stride;
>          pixU += ((realH >> vChromaShift) - 1) * strideC;
> @@ -305,7 +301,7 @@
>          for (uint32_t y = 0; y < lumaMarginY; y++)
>              memcpy(pixY + (y + 1) * stride, pixY, copySizeY *
> sizeof(pixel));
>
> -        if (m_param->internalCsp != X265_CSP_I400)
> +        if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
>          {
>              for (uint32_t y = 0; y < chromaMarginY; y++)
>              {
> @@ -320,12 +316,13 @@
>  void FrameFilter::ParallelFilter::processTasks(int /*workerThreadId*/)
>  {
>      SAOParam* saoParam = m_encData->m_saoParam;
> -    const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
> -    const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
> +    const CUGeom* cuGeoms = m_frameFilter->m_frameEncoder->m_cuGeoms;
> +    const uint32_t* ctuGeomMap =
> m_frameFilter->m_frameEncoder->m_ctuGeomMap;
>      PicYuv* reconPic = m_encData->m_reconPic;
>      const int colStart = m_lastCol.get();
>      // TODO: Waiting previous row finish or simple clip on it?
>      const int colEnd = m_allowedCol.get();
> +    const int numCols = m_frameFilter->m_numCols;
>
>      // Avoid threading conflict
>      if (colStart >= colEnd)
> @@ -335,7 +332,7 @@
>      {
>          const uint32_t cuAddr = m_rowAddr + col;
>
> -        if (m_param->bEnableLoopFilter)
> +        if (m_frameFilter->m_param->bEnableLoopFilter)
>          {
>              const CUData* ctu = m_encData->getPicCTU(cuAddr);
>              deblockCTU(ctu, cuGeoms[ctuGeomMap[cuAddr]],
> Deblock::EDGE_VER);
> @@ -343,17 +340,17 @@
>
>          if (col >= 1)
>          {
> -            if (m_param->bEnableLoopFilter)
> +            if (m_frameFilter->m_param->bEnableLoopFilter)
>              {
>                  const CUData* ctuPrev = m_encData->getPicCTU(cuAddr - 1);
>                  deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]],
> Deblock::EDGE_HOR);
>
>                  // When SAO Disable, setting column counter here
> -                if ((!m_param->bEnableSAO) & (m_row >= 1))
> +                if ((!m_frameFilter->m_param->bEnableSAO) & (m_row >= 1))
>                      m_prevRow->processPostCu(col - 1);
>              }
>
> -            if (m_param->bEnableSAO)
> +            if (m_frameFilter->m_param->bEnableSAO)
>              {
>                  // Save SAO bottom row reference pixels
>                  copySaoAboveRef(reconPic, cuAddr - 1, col - 1);
> @@ -381,58 +378,58 @@
>          m_lastCol.incr();
>      }
>
> -    if (colEnd == (int)m_numCols)
> +    if (colEnd == numCols)
>      {
> -        const uint32_t cuAddr = m_rowAddr + m_numCols - 1;
> +        const uint32_t cuAddr = m_rowAddr + numCols - 1;
>
> -        if (m_param->bEnableLoopFilter)
> +        if (m_frameFilter->m_param->bEnableLoopFilter)
>          {
>              const CUData* ctuPrev = m_encData->getPicCTU(cuAddr);
>              deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr]],
> Deblock::EDGE_HOR);
>
>              // When SAO Disable, setting column counter here
> -            if ((!m_param->bEnableSAO) & (m_row >= 1))
> -                m_prevRow->processPostCu(m_numCols - 1);
> +            if ((!m_frameFilter->m_param->bEnableSAO) & (m_row >= 1))
> +                m_prevRow->processPostCu(numCols - 1);
>          }
>
>          // TODO: move processPostCu() into processSaoUnitCu()
> -        if (m_param->bEnableSAO)
> +        if (m_frameFilter->m_param->bEnableSAO)
>          {
>              // Save SAO bottom row reference pixels
> -            copySaoAboveRef(reconPic, cuAddr, m_numCols - 1);
> +            copySaoAboveRef(reconPic, cuAddr, numCols - 1);
>
>              // SAO Decide
>              // NOTE: reduce condition check for 1 CU only video, Why
> someone play with it?
> -            if (m_numCols >= 2)
> -                m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, m_numCols - 2,
> cuAddr - 1);
> +            if (numCols >= 2)
> +                m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, numCols - 2,
> cuAddr - 1);
>
> -            if (m_numCols >= 1)
> -                m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, m_numCols - 1,
> cuAddr);
> +            if (numCols >= 1)
> +                m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, numCols - 1,
> cuAddr);
>
>              // Process Previous Rows SAO CU
> -            if (m_row >= 1 && m_numCols >= 3)
> +            if (m_row >= 1 && numCols >= 3)
>              {
> -                m_prevRow->processSaoUnitCu(saoParam, m_numCols - 3);
> -                m_prevRow->processPostCu(m_numCols - 3);
> +                m_prevRow->processSaoUnitCu(saoParam, numCols - 3);
> +                m_prevRow->processPostCu(numCols - 3);
>              }
>
> -            if (m_row >= 1 && m_numCols >= 2)
> +            if (m_row >= 1 && numCols >= 2)
>              {
> -                m_prevRow->processSaoUnitCu(saoParam, m_numCols - 2);
> -                m_prevRow->processPostCu(m_numCols - 2);
> +                m_prevRow->processSaoUnitCu(saoParam, numCols - 2);
> +                m_prevRow->processPostCu(numCols - 2);
>              }
>
> -            if (m_row >= 1 && m_numCols >= 1)
> +            if (m_row >= 1 && numCols >= 1)
>              {
> -                m_prevRow->processSaoUnitCu(saoParam, m_numCols - 1);
> -                m_prevRow->processPostCu(m_numCols - 1);
> +                m_prevRow->processSaoUnitCu(saoParam, numCols - 1);
> +                m_prevRow->processPostCu(numCols - 1);
>              }
>
>              // Setting column sync counter
>              if (m_row >= 1)
> -                m_frame->m_reconColCount[m_row - 1].set(m_numCols - 1);
> +                m_frameFilter->m_frame->m_reconColCount[m_row -
> 1].set(numCols - 1);
>          }
> -        m_lastDeblocked.set(m_numCols);
> +        m_lastDeblocked.set(numCols);
>      }
>  }
>
> @@ -461,21 +458,21 @@
>          m_parallelFilter[row].waitForExit();
>
>          /* Check to avoid previous row process slower than current row */
> -        X265_CHECK((row < 1) || m_parallelFilter[row -
> 1].m_lastDeblocked.get() == (int)m_parallelFilter[row - 1].m_numCols,
> "previous row not finish");
> +        X265_CHECK((row < 1) || m_parallelFilter[row -
> 1].m_lastDeblocked.get() == m_numCols, "previous row not finish");
>
> -
> m_parallelFilter[row].m_allowedCol.set(m_parallelFilter[row].m_numCols);
> +        m_parallelFilter[row].m_allowedCol.set(m_numCols);
>          m_parallelFilter[row].processTasks(-1);
>
>          if (row == m_numRows - 1)
>          {
>              /* TODO: Early start last row */
> -            if ((row >= 1) && (m_parallelFilter[row -
> 1].m_lastDeblocked.get() != (int)m_parallelFilter[row - 1].m_numCols))
> +            if ((row >= 1) && (m_parallelFilter[row -
> 1].m_lastDeblocked.get() != m_numCols))
>                  x265_log(m_param, X265_LOG_WARNING, "detected
> ParallelFilter race condition on last row\n");
>
>              /* Apply SAO on last row of CUs, because we always apply SAO
> on row[X-1] */
>              if (m_param->bEnableSAO)
>              {
> -                for(uint32_t col = 0; col <
> m_parallelFilter[row].m_numCols; col++)
> +                for(int col = 0; col < m_numCols; col++)
>                  {
>                      // NOTE: must use processSaoUnitCu(), it include
> TQBypass logic
>                      m_parallelFilter[row].processSaoUnitCu(saoParam, col);
> @@ -483,7 +480,7 @@
>              }
>
>              // Process border extension on last row
> -            for(uint32_t col = 0; col < m_parallelFilter[row].m_numCols;
> col++)
> +            for(int col = 0; col < m_numCols; col++)
>              {
>                  // m_reconColCount will be set in processPostCu()
>                  m_parallelFilter[row].processPostCu(col);
> diff -r 765864c3c6f0 -r 08dadacfe2cd source/encoder/framefilter.h
> --- a/source/encoder/framefilter.h      Tue Jan 19 19:28:58 2016 +0800
> +++ b/source/encoder/framefilter.h      Tue Jan 19 19:58:01 2016 +0800
> @@ -52,8 +52,10 @@
>      int           m_pad[2];
>
>      int           m_numRows;
> +    int           m_numCols;
>      int           m_saoRowDelay;
>      int           m_lastHeight;
> +    int           m_lastWidth;
>
>      void*         m_ssimBuf;        /* Temp storage for ssim computation
> */
>
> @@ -61,15 +63,10 @@
>      class ParallelFilter : public BondedTaskGroup, public Deblock
>      {
>      public:
> -        uint32_t            m_numCols;
> -        uint32_t            m_numRows;
>          uint32_t            m_rowHeight;
> -        uint32_t            m_lastWidth;
> -        uint32_t            m_row;
> +        int                 m_row;
>          uint32_t            m_rowAddr;
> -        x265_param*         m_param;
> -        Frame*              m_frame;
> -        FrameEncoder*       m_frameEncoder;
> +        FrameFilter*        m_frameFilter;
>          FrameData*          m_encData;
>          ParallelFilter*     m_prevRow;
>          SAO                 m_sao;
> @@ -78,15 +75,10 @@
>          ThreadSafeInteger   m_lastDeblocked;   /* The column that
> finished all of Deblock stages  */
>
>          ParallelFilter()
> -            : m_numCols(0)
> -            , m_numRows(0)
> -            , m_rowHeight(0)
> -            , m_lastWidth(0)
> +            : m_rowHeight(0)
>              , m_row(0)
>              , m_rowAddr(0)
> -            , m_param(NULL)
> -            , m_frame(NULL)
> -            , m_frameEncoder(NULL)
> +            , m_frameFilter(NULL)
>              , m_encData(NULL)
>              , m_prevRow(NULL)
>          {
> @@ -104,18 +96,13 @@
>          void copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col);
>
>          // Post-Process (Border extension)
> -        void processPostCu(uint32_t col) const;
> +        void processPostCu(int col) const;
>
>          uint32_t getCUHeight() const
>          {
>              return m_rowHeight;
>          }
>
> -        uint32_t getCUWidth(int colNum) const
> -        {
> -            return (colNum == (int)m_numCols - 1) ? m_lastWidth :
> g_maxCUSize;
> -        }
> -
>      protected:
>
>          ParallelFilter operator=(const ParallelFilter&);
> @@ -132,6 +119,11 @@
>      {
>      }
>
> +    uint32_t getCUWidth(int colNum) const
> +    {
> +        return (colNum == (int)m_numCols - 1) ? m_lastWidth : g_maxCUSize;
> +    }
> +
>      void init(Encoder *top, FrameEncoder *frame, int numRows, uint32_t
> numCols);
>      void destroy();
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
Deepthi Nandakumar
Engineering Manager, x265
Multicoreware, Inc
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160120/c57f7700/attachment-0001.html>


More information about the x265-devel mailing list