[x265] [PATCH] Changes for loadCTUData

Deepthi Nandakumar deepthi at multicorewareinc.com
Mon Sep 29 12:41:48 CEST 2014


Ashok/Santhoshini - pls review. Does removing offsets affect any planned
optimizations?

On Sat, Sep 27, 2014 at 7:03 AM, <dtyx265 at gmail.com> wrote:

> # HG changeset patch
> # User David T Yuen <dtyx265 at gmail.com>
> # Date 1411781537 25200
> # Node ID 85098db291ae133981419868685358227b8b1437
> # Parent  4b18a27b52ac69a16805c2b455d4f891cdd4a057
> Changes for loadCTUData
>
> Replaced getDepthScanIdx() with table g_depthScanIdx
> Moved Analysis::loadCTUData to TComDataCU::loadCTUData since it only works
> with TComDataCU fields
> Replaced CU.offsets[2] with local variables in loadCTUData since that is
> the only place it was set and used
> minor changes to reduce the number of local variables in loadCTUData
>
> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComDataCU.cpp
> --- a/source/Lib/TLibCommon/TComDataCU.cpp      Fri Sep 26 10:48:07 2014
> +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.cpp      Fri Sep 26 18:32:17 2014
> -0700
> @@ -2407,4 +2407,43 @@
>          result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
>  }
>
> +void TComDataCU::loadCTUData(uint32_t maxCUSize)
> +{
> +    // Initialize the coding blocks inside the CTB
> +    for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0;
> log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--)
> +    {
> +        uint32_t blockSize  = 1 << log2CUSize;
> +        uint32_t sbWidth    = 1 << (g_log2Size[maxCUSize] - log2CUSize);
> +        int32_t last_level_flag = log2CUSize == MIN_LOG2_CU_SIZE;
> +        for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++)
> +        {
> +            for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++)
> +            {
> +                uint32_t depth_idx = g_depthScanIdx[sb_y][sb_x];
> +                uint32_t cuIdx = rangeCUIdx + depth_idx;
> +                uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth +
> (depth_idx << 2);
> +                uint32_t px = m_cuPelX + sb_x * blockSize;
> +                uint32_t py = m_cuPelY + sb_y * blockSize;
> +                int32_t present_flag = px <
> m_pic->m_origPicYuv->m_picWidth && py < m_pic->m_origPicYuv->m_picHeight;
> +                int32_t split_mandatory_flag = present_flag &&
> !last_level_flag && (px + blockSize > m_pic->m_origPicYuv->m_picWidth || py
> + blockSize > m_pic->m_origPicYuv->m_picHeight);
> +
> +                /* Offset of the luma CU in the X, Y direction in terms
> of pixels from the CTU origin */
> +                uint32_t xOffset = (sb_x * blockSize) >> 3;
> +                uint32_t yOffset = (sb_y * blockSize) >> 3;
> +
> +                CU *cu = m_CULocalData + cuIdx;
> +                cu->log2CUSize = log2CUSize;
> +                cu->childIdx = child_idx;
> +                cu->encodeIdx = g_depthScanIdx[yOffset][xOffset];
> +                cu->flags = 0;
> +
> +                CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag);
> +                CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT,
> split_mandatory_flag);
> +                CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag);
> +            }
> +        }
> +        rangeCUIdx += sbWidth * sbWidth;
> +    }
> +}
> +
>  //! \}
> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComDataCU.h
> --- a/source/Lib/TLibCommon/TComDataCU.h        Fri Sep 26 10:48:07 2014
> +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.h        Fri Sep 26 18:32:17 2014
> -0700
> @@ -114,7 +114,6 @@
>      uint32_t log2CUSize; // Log of the CU size.
>      uint32_t childIdx;   // Index of the first child CU
>      uint32_t encodeIdx;  // Encoding index of this CU in terms of 8x8
> blocks.
> -    uint32_t offset[2];  // Offset of the luma CU in the X, Y direction
> in terms of pixels from the CTU origin
>      uint32_t flags;      // CU flags.
>  };
>
> @@ -274,6 +273,7 @@
>      void          initCU(Frame* pic, uint32_t cuAddr);
>      void          initEstData();
>      void          initSubCU(TComDataCU* cu, uint32_t partUnitIdx,
> uint32_t depth, int qp);
> +    void          loadCTUData(uint32_t maxCUSize);
>
>      void          copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx,
> uint32_t depth);
>      void          copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx,
> uint32_t depth, bool isRDObasedAnalysis = true);
> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComRom.cpp
> --- a/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 10:48:07 2014 +0530
> +++ b/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 18:32:17 2014 -0700
> @@ -517,5 +517,18 @@
>      {256, 64, 16, 4}
>  };
>
> +/* g_depthScanIdx [y][x] */
> +const uint32_t g_depthScanIdx[8][8] =
> +{
> +    {   0,   1,   4,   5,  16,  17,  20,  21,  },
> +    {   2,   3,   6,   7,  18,  19,  22,  23,  },
> +    {   8,   9,  12,  13,  24,  25,  28,  29,  },
> +    {  10,  11,  14,  15,  26,  27,  30,  31,  },
> +    {  32,  33,  36,  37,  48,  49,  52,  53,  },
> +    {  34,  35,  38,  39,  50,  51,  54,  55,  },
> +    {  40,  41,  44,  45,  56,  57,  60,  61,  },
> +    {  42,  43,  46,  47,  58,  59,  62,  63,  }
> +};
> +
>  }
>  //! \}
> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComRom.h
> --- a/source/Lib/TLibCommon/TComRom.h   Fri Sep 26 10:48:07 2014 +0530
> +++ b/source/Lib/TLibCommon/TComRom.h   Fri Sep 26 18:32:17 2014 -0700
> @@ -159,6 +159,8 @@
>
>  extern const uint32_t g_depthInc[3][4];
>
> +extern const uint32_t g_depthScanIdx[8][8];
> +
>  }
>
>  #endif  //ifndef X265_TCOMROM_H
> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Fri Sep 26 10:48:07 2014 +0530
> +++ b/source/encoder/analysis.cpp       Fri Sep 26 18:32:17 2014 -0700
> @@ -30,32 +30,6 @@
>
>  using namespace x265;
>
> -namespace {
> -// TO DO: Remove this function with a table.
> -int getDepthScanIdx(int x, int y, int size)
> -{
> -    if (size == 1)
> -        return 0;
> -
> -    int depth = 0;
> -    int h = size >> 1;
> -
> -    if (x >= h)
> -    {
> -        x -= h;
> -        depth += h * h;
> -    }
> -
> -    if (y >= h)
> -    {
> -        y -= h;
> -        depth += 2 * h * h;
> -    }
> -
> -    return depth + getDepthScanIdx(x, y, h);
> -}
> -}
> -
>  Analysis::Analysis()
>  {
>      m_bestPredYuv     = NULL;
> @@ -253,47 +227,6 @@
>      delete [] m_origYuv;
>  }
>
> -void Analysis::loadCTUData(TComDataCU* parentCU)
> -{
> -    uint8_t cuRange[2]= {MIN_LOG2_CU_SIZE,
> g_log2Size[m_param->maxCUSize]};
> -
> -    // Initialize the coding blocks inside the CTB
> -    for (int rangeIdx = cuRange[1], rangeCUIdx = 0; rangeIdx >=
> cuRange[0]; rangeIdx--)
> -    {
> -        uint32_t log2CUSize = rangeIdx;
> -        int32_t  blockSize  = 1 << log2CUSize;
> -        uint32_t b8Width    = 1 << (cuRange[1] - 3);
> -        uint32_t sbWidth    = 1 << (cuRange[1] - rangeIdx);
> -        int32_t last_level_flag = rangeIdx == cuRange[0];
> -        for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++)
> -        {
> -            for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++)
> -            {
> -                uint32_t depth_idx = getDepthScanIdx(sb_x, sb_y, sbWidth);
> -                uint32_t cuIdx = rangeCUIdx + depth_idx;
> -                uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth +
> (depth_idx << 2);
> -                int32_t px = parentCU->getCUPelX() + sb_x * blockSize;
> -                int32_t py = parentCU->getCUPelY() + sb_y * blockSize;
> -                int32_t present_flag = px <
> parentCU->m_pic->m_origPicYuv->m_picWidth && py <
> parentCU->m_pic->m_origPicYuv->m_picHeight;
> -                int32_t split_mandatory_flag = present_flag &&
> !last_level_flag && (px + blockSize >
> parentCU->m_pic->m_origPicYuv->m_picWidth || py + blockSize >
> parentCU->m_pic->m_origPicYuv->m_picHeight);
> -
> -                CU *cu = parentCU->m_CULocalData + cuIdx;
> -                cu->log2CUSize = log2CUSize;
> -                cu->childIdx = child_idx;
> -                cu->offset[0] = sb_x * blockSize;
> -                cu->offset[1] = sb_y * blockSize;
> -                cu->encodeIdx = getDepthScanIdx(cu->offset[0] >> 3,
> cu->offset[1] >> 3, b8Width);
> -                cu->flags = 0;
> -
> -                CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag);
> -                CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT,
> split_mandatory_flag);
> -                CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag);
> -            }
> -        }
> -        rangeCUIdx += sbWidth * sbWidth;
> -    }
> -}
> -
>  void Analysis::compressCU(TComDataCU* cu)
>  {
>      Frame* pic = cu->m_pic;
> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/analysis.h
> --- a/source/encoder/analysis.h Fri Sep 26 10:48:07 2014 +0530
> +++ b/source/encoder/analysis.h Fri Sep 26 18:32:17 2014 -0700
> @@ -104,7 +104,6 @@
>      bool create(uint32_t totalDepth, uint32_t maxWidth);
>      void destroy();
>      void compressCU(TComDataCU* cu);
> -    void loadCTUData(TComDataCU* cu);
>
>  protected:
>
> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp   Fri Sep 26 10:48:07 2014 +0530
> +++ b/source/encoder/frameencoder.cpp   Fri Sep 26 18:32:17 2014 -0700
> @@ -686,7 +686,7 @@
>              // load current best state from go-on entropy coder
>              curRow.rdEntropyCoders[0][CI_CURR_BEST].load(rowCoder);
>
> -        tld.analysis.loadCTUData(cu);
> +        cu->loadCTUData(m_param->maxCUSize);
>          tld.analysis.m_quant.setQPforQuant(cu);
>          tld.analysis.compressCU(cu); // Does all the CU analysis
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140929/38843608/attachment.html>


More information about the x265-devel mailing list