[x265-commits] [x265] refine deblocking filter

Satoshi Nakagawa nakagawa424 at oki.com
Fri Sep 26 22:44:17 CEST 2014


details:   http://hg.videolan.org/x265/rev/b47d794a0372
branches:  
changeset: 8144:b47d794a0372
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Fri Sep 26 19:34:36 2014 +0900
description:
refine deblocking filter
Subject: [x265] remove getNumPartInCU() and replace it with macro

details:   http://hg.videolan.org/x265/rev/37f33ab176fa
branches:  
changeset: 8145:37f33ab176fa
user:      Santhoshini Sekar <santhoshini at multicorewareinc.com>
date:      Thu Sep 25 15:24:16 2014 +0530
description:
remove getNumPartInCU() and replace it with macro
Subject: [x265] asm: avx2 assembly code for idct8x8

details:   http://hg.videolan.org/x265/rev/8492a3250fef
branches:  
changeset: 8146:8492a3250fef
user:      Yuvaraj Venkatesh<yuvaraj at multicorewareinc.com>
date:      Thu Sep 25 18:50:43 2014 +0530
description:
asm: avx2 assembly code for idct8x8
Subject: [x265] analysis: hoist local function into anonymous namespace (file local)

details:   http://hg.videolan.org/x265/rev/d26780e43a87
branches:  
changeset: 8147:d26780e43a87
user:      Steve Borho <steve at borho.org>
date:      Thu Sep 25 15:04:12 2014 -0500
description:
analysis: hoist local function into anonymous namespace (file local)
Subject: [x265] analysis: remove unused LAMBDA_PARTITION_SELECT

details:   http://hg.videolan.org/x265/rev/391282b02731
branches:  
changeset: 8148:391282b02731
user:      Steve Borho <steve at borho.org>
date:      Thu Sep 25 15:04:56 2014 -0500
description:
analysis: remove unused LAMBDA_PARTITION_SELECT
Subject: [x265] analysis: coding style and comment nits

details:   http://hg.videolan.org/x265/rev/e6cc918fb18e
branches:  
changeset: 8149:e6cc918fb18e
user:      Steve Borho <steve at borho.org>
date:      Thu Sep 25 15:07:07 2014 -0500
description:
analysis: coding style and comment nits
Subject: [x265] analysis: remove #define conditionals for control flow

details:   http://hg.videolan.org/x265/rev/f5f7c23fedd6
branches:  
changeset: 8150:f5f7c23fedd6
user:      Steve Borho <steve at borho.org>
date:      Thu Sep 25 15:09:01 2014 -0500
description:
analysis: remove #define conditionals for control flow

The non-default paths are not being tested (or even compiled) and are thus
assumed broken. The defines simply make the code harder to read.
Subject: [x265] analysis: more style nits, code simplifications. no behavior change

details:   http://hg.videolan.org/x265/rev/bd0e23d7d394
branches:  
changeset: 8151:bd0e23d7d394
user:      Steve Borho <steve at borho.org>
date:      Thu Sep 25 15:26:30 2014 -0500
description:
analysis: more style nits, code simplifications. no behavior change
Subject: [x265] Removed unnecessary call to loadCTUData

details:   http://hg.videolan.org/x265/rev/8119b3d8d260
branches:  
changeset: 8152:8119b3d8d260
user:      David T Yuen <dtyx265 at gmail.com>
date:      Thu Sep 25 16:43:05 2014 -0700
description:
Removed unnecessary call to loadCTUData
Subject: [x265] asm: avx2 assembly code for idct32x32

details:   http://hg.videolan.org/x265/rev/4b18a27b52ac
branches:  
changeset: 8153:4b18a27b52ac
user:      Murugan Vairavel <murugan at multicorewareinc.com>
date:      Fri Sep 26 10:48:07 2014 +0530
description:
asm: avx2 assembly code for idct32x32

diffstat:

 source/Lib/TLibCommon/TComDataCU.cpp |   50 +-
 source/Lib/TLibCommon/TComPicYuv.h   |    2 +
 source/Lib/TLibCommon/TComRom.cpp    |    8 +-
 source/Lib/TLibCommon/TComRom.h      |   10 +-
 source/common/deblock.cpp            |  393 +++++++++++------------
 source/common/deblock.h              |   18 +-
 source/common/frame.h                |    3 +-
 source/common/slice.cpp              |    8 +-
 source/common/x86/asm-primitives.cpp |    4 +
 source/common/x86/dct8.asm           |  594 +++++++++++++++++++++++++++++++++++
 source/common/x86/dct8.h             |   15 +-
 source/encoder/analysis.cpp          |  250 ++++++--------
 source/encoder/encoder.cpp           |    2 +-
 source/encoder/entropy.cpp           |   23 +-
 source/encoder/frameencoder.cpp      |    5 +-
 source/encoder/frameencoder.h        |    6 +-
 source/encoder/framefilter.cpp       |    8 +-
 source/encoder/framefilter.h         |    2 +-
 source/encoder/sao.cpp               |    3 +-
 source/encoder/search.cpp            |   60 +-
 20 files changed, 1004 insertions(+), 460 deletions(-)

diffs (truncated from 2765 to 300 lines):

diff -r 7dccbbed0349 -r 4b18a27b52ac source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Fri Sep 26 10:48:07 2014 +0530
@@ -320,7 +320,7 @@ void TComDataCU::initCU(Frame* pic, uint
     m_totalBits        = 0;
     m_mvBits           = 0;
     m_coeffBits        = 0;
-    m_numPartitions    = pic->getNumPartInCU();
+    m_numPartitions    = NUM_CU_PARTITIONS;
     char* qp           = pic->getCU(getAddr())->getQP();
     m_baseQp           = pic->getCU(getAddr())->m_baseQp;
     for (int i = 0; i < 4; i++)
@@ -772,7 +772,7 @@ TComDataCU* TComDataCU::getPUAbove(uint3
     if (planarAtLCUBoundary)
         return NULL;
 
-    aPartUnitIdx = g_rasterToZscan[absPartIdx + m_pic->getNumPartInCU() - numPartInCUSize];
+    aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - numPartInCUSize];
     return m_cuAbove;
 }
 
@@ -797,7 +797,7 @@ TComDataCU* TComDataCU::getPUAboveLeft(u
                 return this;
             }
         }
-        alPartUnitIdx = g_rasterToZscan[absPartIdx + m_pic->getNumPartInCU() - numPartInCUSize - 1];
+        alPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - numPartInCUSize - 1];
         return m_cuAbove;
     }
 
@@ -807,7 +807,7 @@ TComDataCU* TComDataCU::getPUAboveLeft(u
         return m_cuLeft;
     }
 
-    alPartUnitIdx = g_rasterToZscan[m_pic->getNumPartInCU() - 1];
+    alPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - 1];
     return m_cuAboveLeft;
 }
 
@@ -839,7 +839,7 @@ TComDataCU* TComDataCU::getPUAboveRight(
             }
             return NULL;
         }
-        arPartUnitIdx = g_rasterToZscan[absPartIdxRT + m_pic->getNumPartInCU() - numPartInCUSize + 1];
+        arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - numPartInCUSize + 1];
         return m_cuAbove;
     }
 
@@ -848,7 +848,7 @@ TComDataCU* TComDataCU::getPUAboveRight(
         return NULL;
     }
 
-    arPartUnitIdx = g_rasterToZscan[m_pic->getNumPartInCU() - numPartInCUSize];
+    arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - numPartInCUSize];
     return m_cuAboveRight;
 }
 
@@ -960,7 +960,7 @@ TComDataCU* TComDataCU::getPUAboveRightA
             }
             return NULL;
         }
-        arPartUnitIdx = g_rasterToZscan[absPartIdxRT + m_pic->getNumPartInCU() - numPartInCUSize + partUnitOffset];
+        arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - numPartInCUSize + partUnitOffset];
         if (m_cuAbove == NULL || m_cuAbove->m_slice == NULL)
         {
             return NULL;
@@ -973,7 +973,7 @@ TComDataCU* TComDataCU::getPUAboveRightA
         return NULL;
     }
 
-    arPartUnitIdx = g_rasterToZscan[m_pic->getNumPartInCU() - numPartInCUSize + partUnitOffset - 1];
+    arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - numPartInCUSize + partUnitOffset - 1];
     if ((m_cuAboveRight == NULL || m_cuAboveRight->m_slice == NULL ||
          (m_cuAboveRight->getAddr()) > getAddr()))
     {
@@ -1074,7 +1074,7 @@ char TComDataCU::getLastCodedQP(uint32_t
         else if (getAddr() > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled &&
                                     getAddr() % m_pic->getFrameWidthInCU() == 0))
         {
-            return m_pic->getCU(getAddr() - 1)->getLastCodedQP(m_pic->getNumPartInCU());
+            return m_pic->getCU(getAddr() - 1)->getLastCodedQP(NUM_CU_PARTITIONS);
         }
         else
         {
@@ -1214,7 +1214,7 @@ uint32_t TComDataCU::getCtxSkipFlag(uint
 
 void TComDataCU::clearCbf(uint32_t absPartIdx, uint32_t depth)
 {
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 
     memset(m_cbf[0] + absPartIdx, 0, sizeof(uint8_t) * curPartNum);
     memset(m_cbf[1] + absPartIdx, 0, sizeof(uint8_t) * curPartNum);
@@ -1223,7 +1223,7 @@ void TComDataCU::clearCbf(uint32_t absPa
 
 void TComDataCU::setCbfSubParts(uint32_t cbf, TextType ttype, uint32_t absPartIdx, uint32_t depth)
 {
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 
     memset(m_cbf[ttype] + absPartIdx, cbf, sizeof(uint8_t) * curPartNum);
 }
@@ -1236,14 +1236,14 @@ void TComDataCU::setCbfPartRange(uint32_
 void TComDataCU::setDepthSubParts(uint32_t depth)
 {
     /*All 4x4 partitions in current CU have the CU depth saved*/
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 
     memset(m_depth, depth, sizeof(uint8_t) * curPartNum);
 }
 
 bool TComDataCU::isFirstAbsZorderIdxInDepth(uint32_t absPartIdx, uint32_t depth)
 {
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 
     return ((m_absIdxInLCU + absPartIdx) & (curPartNum - 1)) == 0;
 }
@@ -1251,29 +1251,29 @@ bool TComDataCU::isFirstAbsZorderIdxInDe
 void TComDataCU::setPartSizeSubParts(PartSize mode, uint32_t absPartIdx, uint32_t depth)
 {
     X265_CHECK(sizeof(*m_partSizes) == 1, "size check failure\n");
-    memset(m_partSizes + absPartIdx, mode, m_pic->getNumPartInCU() >> (depth << 1));
+    memset(m_partSizes + absPartIdx, mode, NUM_CU_PARTITIONS >> (depth << 1));
 }
 
 void TComDataCU::setCUTransquantBypassSubParts(bool flag, uint32_t absPartIdx, uint32_t depth)
 {
-    memset(m_cuTransquantBypass + absPartIdx, flag, m_pic->getNumPartInCU() >> (depth << 1));
+    memset(m_cuTransquantBypass + absPartIdx, flag, NUM_CU_PARTITIONS >> (depth << 1));
 }
 
 void TComDataCU::setSkipFlagSubParts(bool skip, uint32_t absPartIdx, uint32_t depth)
 {
     X265_CHECK(sizeof(*m_skipFlag) == 1, "size check failure\n");
-    memset(m_skipFlag + absPartIdx, skip, m_pic->getNumPartInCU() >> (depth << 1));
+    memset(m_skipFlag + absPartIdx, skip, NUM_CU_PARTITIONS >> (depth << 1));
 }
 
 void TComDataCU::setPredModeSubParts(PredMode eMode, uint32_t absPartIdx, uint32_t depth)
 {
     X265_CHECK(sizeof(*m_predModes) == 1, "size check failure\n");
-    memset(m_predModes + absPartIdx, eMode, m_pic->getNumPartInCU() >> (depth << 1));
+    memset(m_predModes + absPartIdx, eMode, NUM_CU_PARTITIONS >> (depth << 1));
 }
 
 void TComDataCU::setQPSubCUs(int qp, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool &foundNonZeroCbf)
 {
-    uint32_t curPartNumb = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNumb = NUM_CU_PARTITIONS >> (depth << 1);
     uint32_t curPartNumQ = curPartNumb >> 2;
 
     if (!foundNonZeroCbf)
@@ -1301,7 +1301,7 @@ void TComDataCU::setQPSubCUs(int qp, TCo
 
 void TComDataCU::setQPSubParts(int qp, uint32_t absPartIdx, uint32_t depth)
 {
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 
     for (uint32_t scuIdx = absPartIdx; scuIdx < absPartIdx + curPartNum; scuIdx++)
     {
@@ -1311,7 +1311,7 @@ void TComDataCU::setQPSubParts(int qp, u
 
 void TComDataCU::setLumaIntraDirSubParts(uint32_t dir, uint32_t absPartIdx, uint32_t depth)
 {
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 
     memset(m_lumaIntraDir + absPartIdx, dir, sizeof(uint8_t) * curPartNum);
 }
@@ -1321,7 +1321,7 @@ void TComDataCU::setSubPart(T param, T* 
 {
     X265_CHECK(sizeof(T) == 1, "size check failure\n"); // Using memset() works only for types of size 1
 
-    uint32_t curPartNumQ = (m_pic->getNumPartInCU() >> (2 * cuDepth)) >> 2;
+    uint32_t curPartNumQ = (NUM_CU_PARTITIONS >> (2 * cuDepth)) >> 2;
     switch (m_partSizes[cuAddr])
     {
     case SIZE_2Nx2N:
@@ -1416,7 +1416,7 @@ void TComDataCU::setSubPart(T param, T* 
 
 void TComDataCU::setChromIntraDirSubParts(uint32_t dir, uint32_t absPartIdx, uint32_t depth)
 {
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 
     memset(m_chromaIntraDir + absPartIdx, dir, sizeof(uint8_t) * curPartNum);
 }
@@ -1428,14 +1428,14 @@ void TComDataCU::setInterDirSubParts(uin
 
 void TComDataCU::setTrIdxSubParts(uint32_t trIdx, uint32_t absPartIdx, uint32_t depth)
 {
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 
     memset(m_trIdx + absPartIdx, trIdx, sizeof(uint8_t) * curPartNum);
 }
 
 void TComDataCU::setTransformSkipSubParts(uint32_t useTransformSkipY, uint32_t useTransformSkipU, uint32_t useTransformSkipV, uint32_t absPartIdx, uint32_t depth)
 {
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 
     memset(m_transformSkip[0] + absPartIdx, useTransformSkipY, sizeof(uint8_t) * curPartNum);
     memset(m_transformSkip[1] + absPartIdx, useTransformSkipU, sizeof(uint8_t) * curPartNum);
@@ -1444,7 +1444,7 @@ void TComDataCU::setTransformSkipSubPart
 
 void TComDataCU::setTransformSkipSubParts(uint32_t useTransformSkip, TextType ttype, uint32_t absPartIdx, uint32_t depth)
 {
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 
     memset(m_transformSkip[ttype] + absPartIdx, useTransformSkip, sizeof(uint8_t) * curPartNum);
 }
diff -r 7dccbbed0349 -r 4b18a27b52ac source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Fri Sep 26 10:48:07 2014 +0530
@@ -155,6 +155,8 @@ public:
 
     pixel*  getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
 
+    int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
+
     uint32_t getCUHeight(int rowNum);
 
     void  copyFromPicture(const x265_picture&, int padx, int pady);
diff -r 7dccbbed0349 -r 4b18a27b52ac source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.cpp	Fri Sep 26 10:48:07 2014 +0530
@@ -115,10 +115,10 @@ uint32_t g_maxLog2CUSize = MAX_LOG2_CU_S
 uint32_t g_maxCUSize     = MAX_CU_SIZE;
 uint32_t g_maxFullDepth  = NUM_FULL_DEPTH - 1;
 uint32_t g_maxCUDepth    = NUM_CU_DEPTH - 1;
-uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
-uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
+uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] = { 0, };
+uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] = { 0, };
 
-const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] =
+const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS] =
 {
     0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
     16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
@@ -138,7 +138,7 @@ const uint8_t g_zscanToPelX[MAX_NUM_SPU_
     48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60
 };
 
-const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W] =
+const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS] =
 {
     0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
     0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
diff -r 7dccbbed0349 -r 4b18a27b52ac source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.h	Fri Sep 26 10:48:07 2014 +0530
@@ -54,6 +54,8 @@ namespace x265 {
 #define UNIT_SIZE               (1 << LOG2_UNIT_SIZE)       // unit size of CU partition
 #define TMVP_UNIT_MASK          0xF0                        // mask for mapping index to CompressMV field
 
+#define MAX_NUM_PARTITIONS      256
+
 #define MIN_PU_SIZE             4
 #define MIN_TU_SIZE             4
 #define MAX_NUM_SPU_W           (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number of SPU in horizontal line
@@ -75,15 +77,15 @@ extern const uint8_t g_chromaScale[chrom
 extern const uint8_t g_chroma422IntraAngleMappingTable[36];
 
 // flexible conversion from relative to absolute index
-extern uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-extern uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
+extern uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS];
+extern uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS];
 
 void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t startVal, uint32_t*& curIdx);
 void initRasterToZscan(uint32_t maxFullDepth);
 
 // conversion of partition index to picture pel position
-extern const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-extern const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
+extern const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS];
+extern const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS];
 
 // global variable (LCU width/height, max. CU depth)
 extern uint32_t g_maxLog2CUSize;
diff -r 7dccbbed0349 -r 4b18a27b52ac source/common/deblock.cpp
--- a/source/common/deblock.cpp	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/common/deblock.cpp	Fri Sep 26 10:48:07 2014 +0530
@@ -32,23 +32,24 @@ using namespace x265;
 #define DEBLOCK_SMALLEST_BLOCK  8
 #define DEFAULT_INTRA_TC_OFFSET 2
 
-void Deblock::deblockCTU(TComDataCU* cu, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::deblockCTU(TComDataCU* cu, int32_t dir)
 {
+    uint8_t blockingStrength[MAX_NUM_PARTITIONS];
+
     memset(blockingStrength, 0, sizeof(uint8_t) * m_numPartitions);
-    memset(edgeFilter, 0, sizeof(bool) * m_numPartitions);


More information about the x265-commits mailing list