[x265] refine deblocking filter

Satoshi Nakagawa nakagawa424 at oki.com
Wed Sep 24 11:19:03 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1411549726 -32400
#      Wed Sep 24 18:08:46 2014 +0900
# Node ID 9f96fc8374d834d424190b0b1581054996985b67
# Parent  b2b7072ddbf73085d457bd6a71bca946e505dea8
refine deblocking filter

diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 24 11:48:15 2014 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 24 18:08:46 2014 +0900
@@ -155,6 +155,8 @@
 
     pixel*  getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
 
+    int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
+
     uint32_t getCUHeight(int rowNum);
 
     void  copyFromPicture(const x265_picture&, int padx, int pady);
diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.cpp
--- a/source/common/deblock.cpp	Wed Sep 24 11:48:15 2014 +0530
+++ b/source/common/deblock.cpp	Wed Sep 24 18:08:46 2014 +0900
@@ -48,7 +48,7 @@
         return;
 
     Frame* pic = cu->m_pic;
-    uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curNumParts = m_numPartitions >> (depth * 2);
 
     if (cu->getDepth(absZOrderIdx) > depth)
     {
@@ -56,35 +56,34 @@
         uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples  - cu->getCUPelX();
         uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->getCUPelY();
         for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
-        {
             if (g_zscanToPelX[absZOrderIdx] < xmax && g_zscanToPelY[absZOrderIdx] < ymax)
                 deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
-        }
         return;
     }
 
     Param params;
     setLoopfilterParam(cu, absZOrderIdx, &params);
-    setEdgefilterTU(cu, absZOrderIdx, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
+    setEdgefilterTU(cu, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
     setEdgefilterPU(cu, absZOrderIdx, dir, &params, edgeFilter, blockingStrength);
 
     for (uint32_t partIdx = absZOrderIdx; partIdx < absZOrderIdx + curNumParts; partIdx++)
     {
-        uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx & 1) : !(partIdx & 2));
+        uint32_t bsCheck = !(partIdx & (1 << dir));
 
-        if (edgeFilter[partIdx] && bsCheck)
+        if (bsCheck && edgeFilter[partIdx])
             getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
     }
 
-    uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
+    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
     uint32_t sizeInPU = pic->getNumPartInCUSize() >> depth;
     uint32_t shiftFactor = (dir == EDGE_VER) ? cu->getHorzChromaShift() : cu->getVertChromaShift();
-    const bool alwaysDoChroma = cu->getChromaFormat() == X265_CSP_I444;
-
+    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
+    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absZOrderIdx] : g_zscanToPelY[absZOrderIdx]) >> LOG2_UNIT_SIZE;
+        
     for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
     {
         edgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
-        if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE)))
+        if (!((e0 + e) & chromaMask))
             edgeFilterChroma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
     }
 }
@@ -115,66 +114,60 @@
     }
 }
 
-void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
 {
     if (cu->getTransformIdx(absZOrderIdx) + cu->getDepth(absZOrderIdx) > (uint8_t)depth)
     {
-        const uint32_t curNumParts = cu->m_pic->getNumPartInCU() >> (depth << 1);
+        const uint32_t curNumParts = m_numPartitions >> (depth * 2);
         const uint32_t qNumParts   = curNumParts >> 2;
 
         for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
-        {
-            uint32_t nsAddr = absZOrderIdx;
-            setEdgefilterTU(cu, nsAddr, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
-        }
+            setEdgefilterTU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
         return;
     }
 
     uint32_t widthInBaseUnits  = 1 << (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) - LOG2_UNIT_SIZE);
-    setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
+    setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
 }
 
 void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[])
 {
     const uint32_t depth = cu->getDepth(absZOrderIdx);
     const uint32_t widthInBaseUnits  = cu->m_pic->getNumPartInCUSize() >> depth;
-    const uint32_t hWidthInBaseUnits  = widthInBaseUnits  >> 1;
-    const uint32_t qWidthInBaseUnits  = widthInBaseUnits  >> 2;
+    const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
+    const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
 
     setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, (dir == EDGE_VER ? params->leftEdge : params->topEdge), edgeFilter, blockingStrength);
 
-    int32_t mode = cu->getPartitionSize(absZOrderIdx);
-    switch (mode)
+    switch (cu->getPartitionSize(absZOrderIdx))
     {
     case SIZE_2NxN:
+        if (EDGE_HOR == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
+        break;
     case SIZE_Nx2N:
-        {
-            const int32_t realDir = (mode == SIZE_2NxN ? EDGE_HOR : EDGE_VER);
-            if (realDir == dir)
-                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
-            break;
-        }
+        if (EDGE_VER == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
+        break;
     case SIZE_NxN:
-        {
-            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
-            break;
-        }
+        setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
+        break;
     case SIZE_2NxnU:
+        if (EDGE_HOR == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
+        break;
     case SIZE_nLx2N:
-        {
-            const int32_t realDir = (mode == SIZE_2NxnU ? EDGE_HOR : EDGE_VER);
-            if (realDir == dir)
-                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
-            break;
-        }
+        if (EDGE_VER == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
+        break;
     case SIZE_2NxnD:
+        if (EDGE_HOR == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
+        break;
     case SIZE_nRx2N:
-        {
-            const int32_t realDir = (mode == SIZE_2NxnD ? EDGE_HOR : EDGE_VER);
-            if (realDir == dir)
-                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
-            break;
-        }
+        if (EDGE_VER == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
+        break;
 
     case SIZE_2Nx2N:
     default:
@@ -338,15 +331,15 @@
     return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
 }
 
-static inline bool useStrongFiltering(int32_t offset, int32_t d, int32_t beta, int32_t tc, pixel* src)
+static inline bool useStrongFiltering(int32_t offset, int32_t beta, int32_t tc, pixel* src)
 {
+    int16_t m0     = (int16_t)src[-offset * 4];
+    int16_t m3     = (int16_t)src[-offset];
     int16_t m4     = (int16_t)src[0];
-    int16_t m3     = (int16_t)src[-offset];
     int16_t m7     = (int16_t)src[offset * 3];
-    int16_t m0     = (int16_t)src[-offset * 4];
     int32_t strong = abs(m0 - m3) + abs(m7 - m4);
 
-    return (strong < (beta >> 3)) && (d < (beta >> 2)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
+    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
 }
 
 /* Deblocking for the luminance component with strong or weak filter
@@ -362,63 +355,61 @@
 static inline void pelFilterLuma(pixel* src, int32_t offset, int32_t tc, bool sw, bool partPNoFilter, bool partQNoFilter,
                                  int32_t thrCut, bool filterSecondP, bool filterSecondQ)
 {
-    int32_t delta;
-
+    int16_t m1  = (int16_t)src[-offset * 3];
+    int16_t m2  = (int16_t)src[-offset * 2];
+    int16_t m3  = (int16_t)src[-offset];
     int16_t m4  = (int16_t)src[0];
-    int16_t m3  = (int16_t)src[-offset];
     int16_t m5  = (int16_t)src[offset];
-    int16_t m2  = (int16_t)src[-offset * 2];
     int16_t m6  = (int16_t)src[offset * 2];
-    int16_t m1  = (int16_t)src[-offset * 3];
-    int16_t m7  = (int16_t)src[offset * 3];
-    int16_t m0  = (int16_t)src[-offset * 4];
 
     if (sw)
     {
-        src[-offset]     = (pixel)Clip3(m3 - 2 * tc, m3 + 2 * tc, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3));
-        src[0]           = (pixel)Clip3(m4 - 2 * tc, m4 + 2 * tc, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3));
-        src[-offset * 2] = (pixel)Clip3(m2 - 2 * tc, m2 + 2 * tc, ((m1 + m2 + m3 + m4 + 2) >> 2));
-        src[offset]      = (pixel)Clip3(m5 - 2 * tc, m5 + 2 * tc, ((m3 + m4 + m5 + m6 + 2) >> 2));
-        src[-offset * 3] = (pixel)Clip3(m1 - 2 * tc, m1 + 2 * tc, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3));
-        src[offset * 2]  = (pixel)Clip3(m6 - 2 * tc, m6 + 2 * tc, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3));
+        int16_t m0  = (int16_t)src[-offset * 4];
+        int16_t m7  = (int16_t)src[offset * 3];
+        int32_t tc2 = 2 * tc;
+        if (!partPNoFilter)
+        {
+            src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
+            src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
+            src[-offset]     = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
+        }
+        if (!partQNoFilter)
+        {
+            src[0]           = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
+            src[offset]      = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
+            src[offset * 2]  = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
+        }
     }
     else
     {
         /* Weak filter */
-        delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
+        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
 
         if (abs(delta) < thrCut)
         {
             delta = Clip3(-tc, tc, delta);
-            src[-offset] = Clip(m3 + delta);
-            src[0] = Clip(m4 - delta);
 
             int32_t tc2 = tc >> 1;
-            if (filterSecondP)
+            if (!partPNoFilter)
             {
-                int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
-                src[-offset * 2] = Clip(m2 + delta1);
+                src[-offset] = Clip(m3 + delta);
+                if (filterSecondP)
+                {
+                    int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
+                    src[-offset * 2] = Clip(m2 + delta1);
+                }
             }
-            if (filterSecondQ)
+            if (!partQNoFilter)
             {
-                int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
-                src[offset] = Clip(m5 + delta2);
+                src[0] = Clip(m4 - delta);
+                if (filterSecondQ)
+                {
+                    int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
+                    src[offset] = Clip(m5 + delta2);
+                }
             }
         }
     }
-
-    if (partPNoFilter)
-    {
-        src[-offset] = (pixel)m3;
-        src[-offset * 2] = (pixel)m2;
-        src[-offset * 3] = (pixel)m1;
-    }
-    if (partQNoFilter)
-    {
-        src[0] = (pixel)m4;
-        src[offset] = (pixel)m5;
-        src[offset * 2] = (pixel)m6;
-    }
 }
 
 /* Deblocking of one line/column for the chrominance component
@@ -429,34 +420,26 @@
  * \param partQNoFilter  indicator to disable filtering on partQ */
 static inline void pelFilterChroma(pixel* src, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
 {
-    int32_t delta;
+    int16_t m2  = (int16_t)src[-offset * 2];
+    int16_t m3  = (int16_t)src[-offset];
+    int16_t m4  = (int16_t)src[0];
+    int16_t m5  = (int16_t)src[offset];
 
-    int16_t m4  = (int16_t)src[0];
-    int16_t m3  = (int16_t)src[-offset];
-    int16_t m5  = (int16_t)src[offset];
-    int16_t m2  = (int16_t)src[-offset * 2];
-
-    delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
-    src[-offset] = Clip(m3 + delta);
-    src[0] = Clip(m4 - delta);
-
-    if (partPNoFilter)
-        src[-offset] = (pixel)m3;
-    if (partQNoFilter)
-        src[0] = (pixel)m4;
+    int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
+    if (!partPNoFilter)
+        src[-offset] = Clip(m3 + delta);
+    if (!partQNoFilter)
+        src[0] = Clip(m4 - delta);
 }
 
 void Deblock::edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
 {
     TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
     pixel* src = reconYuv->getLumaAddr(cu->getAddr(), absZOrderIdx);
-    pixel* tmpsrc = src;
 
     int32_t stride = reconYuv->getStride();
     uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
 
-    uint32_t blocksInPart = (LOG2_UNIT_SIZE - 2) > 0 ? 1 << (LOG2_UNIT_SIZE - 2) : 1;
-    uint32_t bsAbsIdx = 0, bs = 0;
     int32_t offset, srcStep;
 
     bool  partPNoFilter = false;
@@ -472,20 +455,20 @@
     {
         offset = 1;
         srcStep = stride;
-        tmpsrc += (edge << LOG2_UNIT_SIZE);
+        src += (edge << LOG2_UNIT_SIZE);
     }
     else // (dir == EDGE_HOR)
     {
         offset = stride;
         srcStep = 1;
-        tmpsrc += (edge << LOG2_UNIT_SIZE) * stride;
+        src += (edge << LOG2_UNIT_SIZE) * stride;
     }
 
     for (uint32_t idx = 0; idx < numParts; idx++)
     {
-        uint32_t partOffset = idx << LOG2_UNIT_SIZE;
-        bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
-        bs = blockingStrength[bsAbsIdx];
+        uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
+        uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
+        uint32_t bs = blockingStrength[bsAbsIdx];
         if (bs)
         {
             int32_t qpQ = cu->getQP(bsAbsIdx);
@@ -499,29 +482,25 @@
 
             int32_t qpP = cuP->getQP(partP);
             int32_t qp = (qpP + qpQ + 1) >> 1;
-            int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
 
-            int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
             int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset);
 
-            int32_t tc = s_tcTable[indexTC] * bitdepthScale;
-            int32_t beta = s_betaTable[indexB] * bitdepthScale;
-            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
-            int32_t thrCut = tc * 10;
+            const int32_t bitdepthShift = X265_DEPTH - 8;
+            int32_t beta = s_betaTable[indexB] << bitdepthShift;
 
-            for (uint32_t blkIdx = 0; blkIdx < blocksInPart; blkIdx++)
+            int32_t dp0 = calcDP(src + srcStep * (unitOffset + 0), offset);
+            int32_t dq0 = calcDQ(src + srcStep * (unitOffset + 0), offset);
+            int32_t dp3 = calcDP(src + srcStep * (unitOffset + 3), offset);
+            int32_t dq3 = calcDQ(src + srcStep * (unitOffset + 3), offset);
+            int32_t d0 = dp0 + dq0;
+            int32_t d3 = dp3 + dq3;
+
+            int32_t dp = dp0 + dp3;
+            int32_t dq = dq0 + dq3;
+            int32_t d =  d0 + d3;
+
+            if (d < beta)
             {
-                int32_t dp0 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
-                int32_t dq0 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
-                int32_t dp3 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
-                int32_t dq3 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
-                int32_t d0 = dp0 + dq0;
-                int32_t d3 = dp3 + dq3;
-
-                int32_t dp = dp0 + dp3;
-                int32_t dq = dq0 + dq3;
-                int32_t d =  d0 + d3;
-
                 if (cu->m_slice->m_pps->bTransquantBypassEnabled)
                 {
                     // check if each of PUs is lossless coded
@@ -529,17 +508,21 @@
                     partQNoFilter = cuQ->getCUTransquantBypass(partQ);
                 }
 
-                if (d < beta)
-                {
-                    bool filterP = (dp < sideThreshold);
-                    bool filterQ = (dq < sideThreshold);
+                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
+                int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+                int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
+                int32_t thrCut = tc * 10;
 
-                    bool sw = useStrongFiltering(offset, 2 * d0, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0))
-                           && useStrongFiltering(offset, 2 * d3, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3));
+                bool filterP = (dp < sideThreshold);
+                bool filterQ = (dq < sideThreshold);
 
-                    for (int32_t i = 0; i < DEBLOCK_SMALLEST_BLOCK / 2; i++)
-                        pelFilterLuma(tmpsrc + srcStep * (partOffset + blkIdx * 4 + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP, filterQ);
-                }
+                bool sw = (2 * d0 < (beta >> 2) &&
+                           2 * d3 < (beta >> 2) &&
+                           useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 0)) &&
+                           useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 3)));
+
+                for (int32_t i = 0; i < UNIT_SIZE; i++)
+                    pelFilterLuma(src + srcStep * (unitOffset + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP, filterQ);
             }
         }
     }
@@ -548,17 +531,7 @@
 void Deblock::edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
 {
     int32_t chFmt = cu->getChromaFormat();
-    TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
-    int32_t stride = reconYuv->getCStride();
-    pixel* srcCb = reconYuv->getCbAddr(cu->getAddr(), absZOrderIdx);
-    pixel* srcCr = reconYuv->getCrAddr(cu->getAddr(), absZOrderIdx);
-    uint32_t log2UnitSizeH = LOG2_UNIT_SIZE - cu->getHorzChromaShift();
-    uint32_t log2UnitSizeV = LOG2_UNIT_SIZE - cu->getVertChromaShift();
-    uint32_t sizeChromaH = 1 << log2UnitSizeH;
-    uint32_t sizeChromaV = 1 << log2UnitSizeV;
-    int32_t offset, srcStep;
-
-    const uint32_t lcuWidthInBaseUnits = cu->m_pic->getNumPartInCUSize();
+    int32_t offset, srcStep, chromaShift;
 
     bool partPNoFilter = false;
     bool partQNoFilter = false;
@@ -568,44 +541,42 @@
     TComDataCU* cuQ = cu;
     int32_t tcOffset = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
 
-    // Vertical Position
-    uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] % lcuWidthInBaseUnits + edge;
-    uint32_t edgeNumInLCUHor = g_zscanToRaster[absZOrderIdx] / lcuWidthInBaseUnits + edge;
+    X265_CHECK(((dir == EDGE_VER)
+                ? ((g_zscanToPelX[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getHorzChromaShift())
+                : ((g_zscanToPelY[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getVertChromaShift())) % DEBLOCK_SMALLEST_BLOCK == 0,
+               "invalid edge\n");
 
-    if ((sizeChromaH < DEBLOCK_SMALLEST_BLOCK) && (sizeChromaV < DEBLOCK_SMALLEST_BLOCK) &&
-        (((edgeNumInLCUVert % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeH)) && !dir) ||
-         ((edgeNumInLCUHor % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeV)) && dir)))
-        return;
 
-    uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
-    uint32_t bsAbsIdx;
-    uint8_t bs;
-
-    pixel* tmpSrcCb = srcCb;
-    pixel* tmpSrcCr = srcCr;
-    uint32_t loopLength;
+    TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
+    int32_t stride = reconYuv->getCStride();
+    int32_t srcOffset = reconYuv->getChromaAddrOffset(cu->getAddr(), absZOrderIdx);
 
     if (dir == EDGE_VER)
     {
+        chromaShift = cu->getVertChromaShift();
+        srcOffset += (edge << (LOG2_UNIT_SIZE - cu->getHorzChromaShift()));
         offset     = 1;
         srcStep    = stride;
-        tmpSrcCb  += (edge << log2UnitSizeH);
-        tmpSrcCr  += (edge << log2UnitSizeH);
-        loopLength = sizeChromaV;
     }
     else // (dir == EDGE_HOR)
     {
+        chromaShift = cu->getHorzChromaShift();
+        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cu->getVertChromaShift());
         offset     = stride;
         srcStep    = 1;
-        tmpSrcCb  += edge * stride << log2UnitSizeV;
-        tmpSrcCr  += edge * stride << log2UnitSizeV;
-        loopLength = sizeChromaH;
     }
 
-    for (uint32_t idx = 0; idx < numParts; idx++)
+    pixel* srcChroma[2];
+    srcChroma[0] = reconYuv->getCbAddr() + srcOffset;
+    srcChroma[1] = reconYuv->getCrAddr() + srcOffset;
+
+    uint32_t numUnits = cu->m_pic->getNumPartInCUSize() >> (depth + chromaShift);
+
+    for (uint32_t idx = 0; idx < numUnits; idx++)
     {
-        bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
-        bs = blockingStrength[bsAbsIdx];
+        uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
+        uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx << chromaShift);
+        uint32_t bs = blockingStrength[bsAbsIdx];
 
         if (bs > 1)
         {
@@ -630,7 +601,6 @@
             for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
             {
                 int32_t chromaQPOffset  = !chromaIdx ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset;
-                pixel* tmpSrcChroma = !chromaIdx ? tmpSrcCb : tmpSrcCr;
                 int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
                 if (qp >= 30)
                 {
@@ -640,12 +610,13 @@
                         qp = X265_MIN(qp, 51);
                 }
 
-                int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
-                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset);
-                int32_t tc = s_tcTable[indexTC] * bitdepthScale;
+                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
+                const int32_t bitdepthShift = X265_DEPTH - 8;
+                int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+                pixel* srcC = srcChroma[chromaIdx];
 
-                for (uint32_t step = 0; step < loopLength; step++)
-                    pelFilterChroma(tmpSrcChroma + srcStep * (step + idx * loopLength), offset, tc, partPNoFilter, partQNoFilter);
+                for (int32_t i = 0; i < UNIT_SIZE; i++)
+                    pelFilterChroma(srcC + srcStep * (unitOffset + i), offset, tc, partPNoFilter, partQNoFilter);
             }
         }
     }
diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.h
--- a/source/common/deblock.h	Wed Sep 24 11:48:15 2014 +0530
+++ b/source/common/deblock.h	Wed Sep 24 18:08:46 2014 +0900
@@ -58,7 +58,7 @@
 
     // set filtering functions
     void setLoopfilterParam(TComDataCU* cu, uint32_t absZOrderIdx, Param *params);
-    void setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
+    void setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
     void setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[]);
     void setEdgefilterMultiple(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edgeIdx, bool value, bool edgeFilter[], uint8_t blockingStrength[], uint32_t widthInBaseUnits = 0);
 


More information about the x265-devel mailing list