[x265] [PATCH] Changes for loadCTUData
Deepthi Nandakumar
deepthi at multicorewareinc.com
Mon Sep 29 12:41:48 CEST 2014
Ashok/Santhoshini - pls review. Does removing offsets affect any planned
optimizations?
On Sat, Sep 27, 2014 at 7:03 AM, <dtyx265 at gmail.com> wrote:
> # HG changeset patch
> # User David T Yuen <dtyx265 at gmail.com>
> # Date 1411781537 25200
> # Node ID 85098db291ae133981419868685358227b8b1437
> # Parent 4b18a27b52ac69a16805c2b455d4f891cdd4a057
> Changes for loadCTUData
>
> Replaced getDepthScanIdx() with table g_depthScanIdx
> Moved Analysis::loadCTUData to TComDataCU::loadCTUData since it only works
> with TComDataCU fields
> Replaced CU.offsets[2] with local variables in loadCTUData since that is
> the only place it was set and used
> minor changes to reduce the number of local variables in loadCTUData
>
> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComDataCU.cpp
> --- a/source/Lib/TLibCommon/TComDataCU.cpp Fri Sep 26 10:48:07 2014
> +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.cpp Fri Sep 26 18:32:17 2014
> -0700
> @@ -2407,4 +2407,43 @@
> result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
> }
>
> +void TComDataCU::loadCTUData(uint32_t maxCUSize)
> +{
> + // Initialize the coding blocks inside the CTB
> + for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0;
> log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--)
> + {
> + uint32_t blockSize = 1 << log2CUSize;
> + uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize);
> + int32_t last_level_flag = log2CUSize == MIN_LOG2_CU_SIZE;
> + for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++)
> + {
> + for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++)
> + {
> + uint32_t depth_idx = g_depthScanIdx[sb_y][sb_x];
> + uint32_t cuIdx = rangeCUIdx + depth_idx;
> + uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth +
> (depth_idx << 2);
> + uint32_t px = m_cuPelX + sb_x * blockSize;
> + uint32_t py = m_cuPelY + sb_y * blockSize;
> + int32_t present_flag = px <
> m_pic->m_origPicYuv->m_picWidth && py < m_pic->m_origPicYuv->m_picHeight;
> + int32_t split_mandatory_flag = present_flag &&
> !last_level_flag && (px + blockSize > m_pic->m_origPicYuv->m_picWidth || py
> + blockSize > m_pic->m_origPicYuv->m_picHeight);
> +
> + /* Offset of the luma CU in the X, Y direction in terms
> of pixels from the CTU origin */
> + uint32_t xOffset = (sb_x * blockSize) >> 3;
> + uint32_t yOffset = (sb_y * blockSize) >> 3;
> +
> + CU *cu = m_CULocalData + cuIdx;
> + cu->log2CUSize = log2CUSize;
> + cu->childIdx = child_idx;
> + cu->encodeIdx = g_depthScanIdx[yOffset][xOffset];
> + cu->flags = 0;
> +
> + CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag);
> + CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT,
> split_mandatory_flag);
> + CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag);
> + }
> + }
> + rangeCUIdx += sbWidth * sbWidth;
> + }
> +}
> +
> //! \}
> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComDataCU.h
> --- a/source/Lib/TLibCommon/TComDataCU.h Fri Sep 26 10:48:07 2014
> +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.h Fri Sep 26 18:32:17 2014
> -0700
> @@ -114,7 +114,6 @@
> uint32_t log2CUSize; // Log of the CU size.
> uint32_t childIdx; // Index of the first child CU
> uint32_t encodeIdx; // Encoding index of this CU in terms of 8x8
> blocks.
> - uint32_t offset[2]; // Offset of the luma CU in the X, Y direction
> in terms of pixels from the CTU origin
> uint32_t flags; // CU flags.
> };
>
> @@ -274,6 +273,7 @@
> void initCU(Frame* pic, uint32_t cuAddr);
> void initEstData();
> void initSubCU(TComDataCU* cu, uint32_t partUnitIdx,
> uint32_t depth, int qp);
> + void loadCTUData(uint32_t maxCUSize);
>
> void copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx,
> uint32_t depth);
> void copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx,
> uint32_t depth, bool isRDObasedAnalysis = true);
> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComRom.cpp
> --- a/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 10:48:07 2014 +0530
> +++ b/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 18:32:17 2014 -0700
> @@ -517,5 +517,18 @@
> {256, 64, 16, 4}
> };
>
> +/* g_depthScanIdx [y][x] */
> +const uint32_t g_depthScanIdx[8][8] =
> +{
> + { 0, 1, 4, 5, 16, 17, 20, 21, },
> + { 2, 3, 6, 7, 18, 19, 22, 23, },
> + { 8, 9, 12, 13, 24, 25, 28, 29, },
> + { 10, 11, 14, 15, 26, 27, 30, 31, },
> + { 32, 33, 36, 37, 48, 49, 52, 53, },
> + { 34, 35, 38, 39, 50, 51, 54, 55, },
> + { 40, 41, 44, 45, 56, 57, 60, 61, },
> + { 42, 43, 46, 47, 58, 59, 62, 63, }
> +};
> +
> }
> //! \}
> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComRom.h
> --- a/source/Lib/TLibCommon/TComRom.h Fri Sep 26 10:48:07 2014 +0530
> +++ b/source/Lib/TLibCommon/TComRom.h Fri Sep 26 18:32:17 2014 -0700
> @@ -159,6 +159,8 @@
>
> extern const uint32_t g_depthInc[3][4];
>
> +extern const uint32_t g_depthScanIdx[8][8];
> +
> }
>
> #endif //ifndef X265_TCOMROM_H
> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Fri Sep 26 10:48:07 2014 +0530
> +++ b/source/encoder/analysis.cpp Fri Sep 26 18:32:17 2014 -0700
> @@ -30,32 +30,6 @@
>
> using namespace x265;
>
> -namespace {
> -// TO DO: Remove this function with a table.
> -int getDepthScanIdx(int x, int y, int size)
> -{
> - if (size == 1)
> - return 0;
> -
> - int depth = 0;
> - int h = size >> 1;
> -
> - if (x >= h)
> - {
> - x -= h;
> - depth += h * h;
> - }
> -
> - if (y >= h)
> - {
> - y -= h;
> - depth += 2 * h * h;
> - }
> -
> - return depth + getDepthScanIdx(x, y, h);
> -}
> -}
> -
> Analysis::Analysis()
> {
> m_bestPredYuv = NULL;
> @@ -253,47 +227,6 @@
> delete [] m_origYuv;
> }
>
> -void Analysis::loadCTUData(TComDataCU* parentCU)
> -{
> - uint8_t cuRange[2]= {MIN_LOG2_CU_SIZE,
> g_log2Size[m_param->maxCUSize]};
> -
> - // Initialize the coding blocks inside the CTB
> - for (int rangeIdx = cuRange[1], rangeCUIdx = 0; rangeIdx >=
> cuRange[0]; rangeIdx--)
> - {
> - uint32_t log2CUSize = rangeIdx;
> - int32_t blockSize = 1 << log2CUSize;
> - uint32_t b8Width = 1 << (cuRange[1] - 3);
> - uint32_t sbWidth = 1 << (cuRange[1] - rangeIdx);
> - int32_t last_level_flag = rangeIdx == cuRange[0];
> - for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++)
> - {
> - for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++)
> - {
> - uint32_t depth_idx = getDepthScanIdx(sb_x, sb_y, sbWidth);
> - uint32_t cuIdx = rangeCUIdx + depth_idx;
> - uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth +
> (depth_idx << 2);
> - int32_t px = parentCU->getCUPelX() + sb_x * blockSize;
> - int32_t py = parentCU->getCUPelY() + sb_y * blockSize;
> - int32_t present_flag = px <
> parentCU->m_pic->m_origPicYuv->m_picWidth && py <
> parentCU->m_pic->m_origPicYuv->m_picHeight;
> - int32_t split_mandatory_flag = present_flag &&
> !last_level_flag && (px + blockSize >
> parentCU->m_pic->m_origPicYuv->m_picWidth || py + blockSize >
> parentCU->m_pic->m_origPicYuv->m_picHeight);
> -
> - CU *cu = parentCU->m_CULocalData + cuIdx;
> - cu->log2CUSize = log2CUSize;
> - cu->childIdx = child_idx;
> - cu->offset[0] = sb_x * blockSize;
> - cu->offset[1] = sb_y * blockSize;
> - cu->encodeIdx = getDepthScanIdx(cu->offset[0] >> 3,
> cu->offset[1] >> 3, b8Width);
> - cu->flags = 0;
> -
> - CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag);
> - CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT,
> split_mandatory_flag);
> - CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag);
> - }
> - }
> - rangeCUIdx += sbWidth * sbWidth;
> - }
> -}
> -
> void Analysis::compressCU(TComDataCU* cu)
> {
> Frame* pic = cu->m_pic;
> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/analysis.h
> --- a/source/encoder/analysis.h Fri Sep 26 10:48:07 2014 +0530
> +++ b/source/encoder/analysis.h Fri Sep 26 18:32:17 2014 -0700
> @@ -104,7 +104,6 @@
> bool create(uint32_t totalDepth, uint32_t maxWidth);
> void destroy();
> void compressCU(TComDataCU* cu);
> - void loadCTUData(TComDataCU* cu);
>
> protected:
>
> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Fri Sep 26 10:48:07 2014 +0530
> +++ b/source/encoder/frameencoder.cpp Fri Sep 26 18:32:17 2014 -0700
> @@ -686,7 +686,7 @@
> // load current best state from go-on entropy coder
> curRow.rdEntropyCoders[0][CI_CURR_BEST].load(rowCoder);
>
> - tld.analysis.loadCTUData(cu);
> + cu->loadCTUData(m_param->maxCUSize);
> tld.analysis.m_quant.setQPforQuant(cu);
> tld.analysis.compressCU(cu); // Does all the CU analysis
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140929/38843608/attachment.html>
More information about the x265-devel
mailing list