[x265] [PATCH] Changes for loadCTUData

Mon Sep 29 13:24:01 CEST 2014

As of now we have not used offset value as per our plan. Later when we
need, can add offset value.

On Mon, Sep 29, 2014 at 4:46 PM, Santhoshini Sekar <
santhoshini at multicorewareinc.com> wrote:

> As of now offset can be removed from CU structure. We don't have any
> planned optimization with it. If needed we can
> add it later.
>
> On Mon, Sep 29, 2014 at 4:11 PM, Deepthi Nandakumar <
> deepthi at multicorewareinc.com> wrote:
>
>> Ashok/Santhoshini - pls review. Does removing offsets affect any planned
>> optimizations?
>>
>> On Sat, Sep 27, 2014 at 7:03 AM, <dtyx265 at gmail.com> wrote:
>>
>>> # HG changeset patch
>>> # User David T Yuen <dtyx265 at gmail.com>
>>> # Date 1411781537 25200
>>> # Node ID 85098db291ae133981419868685358227b8b1437
>>> # Parent  4b18a27b52ac69a16805c2b455d4f891cdd4a057
>>> Changes for loadCTUData
>>>
>>> Replaced getDepthScanIdx() with table g_depthScanIdx
>>> Moved Analysis::loadCTUData to TComDataCU::loadCTUData since it only
>>> works with TComDataCU fields
>>> Replaced CU.offsets[2] with local variables in loadCTUData since that is
>>> the only place it was set and used
>>> minor changes to reduce the number of local variables in loadCTUData
>>>
>>> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComDataCU.cpp
>>> --- a/source/Lib/TLibCommon/TComDataCU.cpp      Fri Sep 26 10:48:07 2014
>>> +0530
>>> +++ b/source/Lib/TLibCommon/TComDataCU.cpp      Fri Sep 26 18:32:17 2014
>>> -0700
>>> @@ -2407,4 +2407,43 @@
>>>          result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
>>>  }
>>>
>>> +void TComDataCU::loadCTUData(uint32_t maxCUSize)
>>> +{
>>> +    // Initialize the coding blocks inside the CTB
>>> +    for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0;
>>> log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--)
>>> +    {
>>> +        uint32_t blockSize  = 1 << log2CUSize;
>>> +        uint32_t sbWidth    = 1 << (g_log2Size[maxCUSize] - log2CUSize);
>>> +        int32_t last_level_flag = log2CUSize == MIN_LOG2_CU_SIZE;
>>> +        for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++)
>>> +        {
>>> +            for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++)
>>> +            {
>>> +                uint32_t depth_idx = g_depthScanIdx[sb_y][sb_x];
>>> +                uint32_t cuIdx = rangeCUIdx + depth_idx;
>>> +                uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth +
>>> (depth_idx << 2);
>>> +                uint32_t px = m_cuPelX + sb_x * blockSize;
>>> +                uint32_t py = m_cuPelY + sb_y * blockSize;
>>> +                int32_t present_flag = px <
>>> m_pic->m_origPicYuv->m_picWidth && py < m_pic->m_origPicYuv->m_picHeight;
>>> +                int32_t split_mandatory_flag = present_flag &&
>>> !last_level_flag && (px + blockSize > m_pic->m_origPicYuv->m_picWidth || py
>>> + blockSize > m_pic->m_origPicYuv->m_picHeight);
>>> +
>>> +                /* Offset of the luma CU in the X, Y direction in terms
>>> of pixels from the CTU origin */
>>> +                uint32_t xOffset = (sb_x * blockSize) >> 3;
>>> +                uint32_t yOffset = (sb_y * blockSize) >> 3;
>>> +
>>> +                CU *cu = m_CULocalData + cuIdx;
>>> +                cu->log2CUSize = log2CUSize;
>>> +                cu->childIdx = child_idx;
>>> +                cu->encodeIdx = g_depthScanIdx[yOffset][xOffset];
>>> +                cu->flags = 0;
>>> +
>>> +                CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag);
>>> +                CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT,
>>> split_mandatory_flag);
>>> +                CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag);
>>> +            }
>>> +        }
>>> +        rangeCUIdx += sbWidth * sbWidth;
>>> +    }
>>> +}
>>> +
>>>  //! \}
>>> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComDataCU.h
>>> --- a/source/Lib/TLibCommon/TComDataCU.h        Fri Sep 26 10:48:07 2014
>>> +0530
>>> +++ b/source/Lib/TLibCommon/TComDataCU.h        Fri Sep 26 18:32:17 2014
>>> -0700
>>> @@ -114,7 +114,6 @@
>>>      uint32_t log2CUSize; // Log of the CU size.
>>>      uint32_t childIdx;   // Index of the first child CU
>>>      uint32_t encodeIdx;  // Encoding index of this CU in terms of 8x8
>>> blocks.
>>> -    uint32_t offset[2];  // Offset of the luma CU in the X, Y direction
>>> in terms of pixels from the CTU origin
>>>      uint32_t flags;      // CU flags.
>>>  };
>>>
>>> @@ -274,6 +273,7 @@
>>>      void          initCU(Frame* pic, uint32_t cuAddr);
>>>      void          initEstData();
>>>      void          initSubCU(TComDataCU* cu, uint32_t partUnitIdx,
>>> uint32_t depth, int qp);
>>> +    void          loadCTUData(uint32_t maxCUSize);
>>>
>>>      void          copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx,
>>> uint32_t depth);
>>>      void          copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx,
>>> uint32_t depth, bool isRDObasedAnalysis = true);
>>> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComRom.cpp
>>> --- a/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 10:48:07 2014 +0530
>>> +++ b/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 18:32:17 2014 -0700
>>> @@ -517,5 +517,18 @@
>>>      {256, 64, 16, 4}
>>>  };
>>>
>>> +/* g_depthScanIdx [y][x] */
>>> +const uint32_t g_depthScanIdx[8][8] =
>>> +{
>>> +    {   0,   1,   4,   5,  16,  17,  20,  21,  },
>>> +    {   2,   3,   6,   7,  18,  19,  22,  23,  },
>>> +    {   8,   9,  12,  13,  24,  25,  28,  29,  },
>>> +    {  10,  11,  14,  15,  26,  27,  30,  31,  },
>>> +    {  32,  33,  36,  37,  48,  49,  52,  53,  },
>>> +    {  34,  35,  38,  39,  50,  51,  54,  55,  },
>>> +    {  40,  41,  44,  45,  56,  57,  60,  61,  },
>>> +    {  42,  43,  46,  47,  58,  59,  62,  63,  }
>>> +};
>>> +
>>>  }
>>>  //! \}
>>> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComRom.h
>>> --- a/source/Lib/TLibCommon/TComRom.h   Fri Sep 26 10:48:07 2014 +0530
>>> +++ b/source/Lib/TLibCommon/TComRom.h   Fri Sep 26 18:32:17 2014 -0700
>>> @@ -159,6 +159,8 @@
>>>
>>>  extern const uint32_t g_depthInc[3][4];
>>>
>>> +extern const uint32_t g_depthScanIdx[8][8];
>>> +
>>>  }
>>>
>>>  #endif  //ifndef X265_TCOMROM_H
>>> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/analysis.cpp
>>> --- a/source/encoder/analysis.cpp       Fri Sep 26 10:48:07 2014 +0530
>>> +++ b/source/encoder/analysis.cpp       Fri Sep 26 18:32:17 2014 -0700
>>> @@ -30,32 +30,6 @@
>>>
>>>  using namespace x265;
>>>
>>> -namespace {
>>> -// TO DO: Remove this function with a table.
>>> -int getDepthScanIdx(int x, int y, int size)
>>> -{
>>> -    if (size == 1)
>>> -        return 0;
>>> -
>>> -    int depth = 0;
>>> -    int h = size >> 1;
>>> -
>>> -    if (x >= h)
>>> -    {
>>> -        x -= h;
>>> -        depth += h * h;
>>> -    }
>>> -
>>> -    if (y >= h)
>>> -    {
>>> -        y -= h;
>>> -        depth += 2 * h * h;
>>> -    }
>>> -
>>> -    return depth + getDepthScanIdx(x, y, h);
>>> -}
>>> -}
>>> -
>>>  Analysis::Analysis()
>>>  {
>>>      m_bestPredYuv     = NULL;
>>> @@ -253,47 +227,6 @@
>>>      delete [] m_origYuv;
>>>  }
>>>
>>> -void Analysis::loadCTUData(TComDataCU* parentCU)
>>> -{
>>> -    uint8_t cuRange[2]= {MIN_LOG2_CU_SIZE,
>>> g_log2Size[m_param->maxCUSize]};
>>> -
>>> -    // Initialize the coding blocks inside the CTB
>>> -    for (int rangeIdx = cuRange[1], rangeCUIdx = 0; rangeIdx >=
>>> cuRange[0]; rangeIdx--)
>>> -    {
>>> -        uint32_t log2CUSize = rangeIdx;
>>> -        int32_t  blockSize  = 1 << log2CUSize;
>>> -        uint32_t b8Width    = 1 << (cuRange[1] - 3);
>>> -        uint32_t sbWidth    = 1 << (cuRange[1] - rangeIdx);
>>> -        int32_t last_level_flag = rangeIdx == cuRange[0];
>>> -        for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++)
>>> -        {
>>> -            for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++)
>>> -            {
>>> -                uint32_t depth_idx = getDepthScanIdx(sb_x, sb_y,
>>> sbWidth);
>>> -                uint32_t cuIdx = rangeCUIdx + depth_idx;
>>> -                uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth +
>>> (depth_idx << 2);
>>> -                int32_t px = parentCU->getCUPelX() + sb_x * blockSize;
>>> -                int32_t py = parentCU->getCUPelY() + sb_y * blockSize;
>>> -                int32_t present_flag = px <
>>> parentCU->m_pic->m_origPicYuv->m_picWidth && py <
>>> parentCU->m_pic->m_origPicYuv->m_picHeight;
>>> -                int32_t split_mandatory_flag = present_flag &&
>>> !last_level_flag && (px + blockSize >
>>> parentCU->m_pic->m_origPicYuv->m_picWidth || py + blockSize >
>>> parentCU->m_pic->m_origPicYuv->m_picHeight);
>>> -
>>> -                CU *cu = parentCU->m_CULocalData + cuIdx;
>>> -                cu->log2CUSize = log2CUSize;
>>> -                cu->childIdx = child_idx;
>>> -                cu->offset[0] = sb_x * blockSize;
>>> -                cu->offset[1] = sb_y * blockSize;
>>> -                cu->encodeIdx = getDepthScanIdx(cu->offset[0] >> 3,
>>> cu->offset[1] >> 3, b8Width);
>>> -                cu->flags = 0;
>>> -
>>> -                CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag);
>>> -                CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT,
>>> split_mandatory_flag);
>>> -                CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag);
>>> -            }
>>> -        }
>>> -        rangeCUIdx += sbWidth * sbWidth;
>>> -    }
>>> -}
>>> -
>>>  void Analysis::compressCU(TComDataCU* cu)
>>>  {
>>>      Frame* pic = cu->m_pic;
>>> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/analysis.h
>>> --- a/source/encoder/analysis.h Fri Sep 26 10:48:07 2014 +0530
>>> +++ b/source/encoder/analysis.h Fri Sep 26 18:32:17 2014 -0700
>>> @@ -104,7 +104,6 @@
>>>      bool create(uint32_t totalDepth, uint32_t maxWidth);
>>>      void destroy();
>>>      void compressCU(TComDataCU* cu);
>>> -    void loadCTUData(TComDataCU* cu);
>>>
>>>  protected:
>>>
>>> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/frameencoder.cpp
>>> --- a/source/encoder/frameencoder.cpp   Fri Sep 26 10:48:07 2014 +0530
>>> +++ b/source/encoder/frameencoder.cpp   Fri Sep 26 18:32:17 2014 -0700
>>> @@ -686,7 +686,7 @@
>>>              // load current best state from go-on entropy coder
>>>              curRow.rdEntropyCoders[0][CI_CURR_BEST].load(rowCoder);
>>>
>>> -        tld.analysis.loadCTUData(cu);
>>> +        cu->loadCTUData(m_param->maxCUSize);
>>>          tld.analysis.m_quant.setQPforQuant(cu);
>>>          tld.analysis.compressCU(cu); // Does all the CU analysis
>>>
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140929/ddc4b4be/attachment-0001.html>