[x265] refine deblocking filter

Satoshi Nakagawa nakagawa424 at oki.com
Wed Nov 5 10:15:07 CET 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1415178450 -32400
#      Wed Nov 05 18:07:30 2014 +0900
# Node ID ce18e3c8e9af1633d4c8ead10197296d0542d0e1
# Parent  2a8f3d5820a6ebe0937ce73fa81154c263df2ae9
refine deblocking filter

diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/common/deblock.cpp
--- a/source/common/deblock.cpp	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/common/deblock.cpp	Wed Nov 05 18:07:30 2014 +0900
@@ -33,18 +33,42 @@
 #define DEBLOCK_SMALLEST_BLOCK  8
 #define DEFAULT_INTRA_TC_OFFSET 2
 
-void Deblock::deblockCTU(CUData* cu, int32_t dir)
+void Deblock::deblockCTU(const CUData* ctu, int32_t dir)
 {
-    uint8_t blockingStrength[MAX_NUM_PARTITIONS];
+    uint8_t blockStrength[MAX_NUM_PARTITIONS];
 
-    memset(blockingStrength, 0, sizeof(uint8_t) * m_numPartitions);
+    memset(blockStrength, 0, sizeof(uint8_t) * m_numPartitions);
 
-    deblockCU(cu, 0, 0, dir, blockingStrength);
+    deblockCU(ctu, 0, 0, dir, blockStrength);
+}
+
+static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
+{
+    if (dir == Deblock::EDGE_VER)
+    {
+        if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0)
+        {
+            uint32_t    tempPartIdx;
+            const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
+            return tempCU ? 2 : 0;
+        }
+    }
+    else
+    {
+        if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0)
+        {
+            uint32_t    tempPartIdx;
+            const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
+            return tempCU ? 2 : 0;
+        }
+    }
+
+    return 0;
 }
 
 /* Deblocking filter process in CU-based (the same function as conventional's)
  * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
-void Deblock::deblockCU(CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockingStrength[])
+void Deblock::deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[])
 {
     if (cu->m_partSize[absPartIdx] == SIZE_NONE)
         return;
@@ -60,23 +84,21 @@
         uint32_t ymax = sps.picHeightInLumaSamples - cu->m_cuPelY;
         for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts)
             if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
-                deblockCU(cu, absPartIdx, depth + 1, dir, blockingStrength);
+                deblockCU(cu, absPartIdx, depth + 1, dir, blockStrength);
         return;
     }
 
-    const uint32_t widthInBaseUnits = sps.numPartInCUSize >> depth;
-    Param params;
-    setLoopfilterParam(cu, absPartIdx, &params);
-    setEdgefilterPU(cu, absPartIdx, dir, blockingStrength, widthInBaseUnits);
-    setEdgefilterTU(cu, absPartIdx, depth, dir, blockingStrength);
-    setEdgefilterMultiple(cu, absPartIdx, dir, 0, (dir == EDGE_VER ? params.leftEdge : params.topEdge), blockingStrength, widthInBaseUnits);
+    const uint32_t numUnits  = sps.numPartInCUSize >> depth;
+    setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
+    setEdgefilterTU(cu, absPartIdx, depth, dir, blockStrength);
+    setEdgefilterMultiple(cu, absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
 
     for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + curNumParts; partIdx++)
     {
         uint32_t bsCheck = !(partIdx & (1 << dir));
 
-        if (bsCheck && blockingStrength[partIdx])
-            getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
+        if (bsCheck && blockStrength[partIdx])
+            blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength);
     }
 
     const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
@@ -87,34 +109,33 @@
         
     for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
     {
-        edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockingStrength);
+        edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
         if (!((e0 + e) & chromaMask))
-            edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockingStrength);
+            edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
     }
 }
 
-static inline uint32_t calcBsIdx(CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
+static inline uint32_t calcBsIdx(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
 {
-    uint32_t ctuWidthInBaseUnits = cu->m_slice->m_sps->numPartInCUSize;
+    uint32_t numPartInCUSize = cu->m_slice->m_sps->numPartInCUSize;
 
     if (dir)
-        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * ctuWidthInBaseUnits + baseUnitIdx];
+        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numPartInCUSize + baseUnitIdx];
     else
-        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * ctuWidthInBaseUnits + edgeIdx];
+        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * numPartInCUSize + edgeIdx];
 }
 
-void Deblock::setEdgefilterMultiple(CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockingStrength[], uint32_t widthInBaseUnits)
+void Deblock::setEdgefilterMultiple(const CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
 {
-    const uint32_t numElem = widthInBaseUnits;
-    X265_CHECK(numElem > 0, "numElem edge filter check\n");
-    for (uint32_t i = 0; i < numElem; i++)
+    X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
+    for (uint32_t i = 0; i < numUnits; i++)
     {
         const uint32_t bsidx = calcBsIdx(cu, scanIdx, dir, edgeIdx, i);
-        blockingStrength[bsidx] = value;
+        blockStrength[bsidx] = value;
     }
 }
 
-void Deblock::setEdgefilterTU(CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockingStrength[])
+void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockStrength[])
 {
     if ((uint32_t)cu->m_tuDepth[absPartIdx] + cu->m_cuDepth[absPartIdx] > depth)
     {
@@ -122,47 +143,47 @@
         const uint32_t qNumParts   = curNumParts >> 2;
 
         for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts)
-            setEdgefilterTU(cu, absPartIdx, depth + 1, dir, blockingStrength);
+            setEdgefilterTU(cu, absPartIdx, depth + 1, dir, blockStrength);
         return;
     }
 
-    uint32_t widthInBaseUnits  = 1 << (cu->m_log2CUSize[absPartIdx] - cu->m_tuDepth[absPartIdx] - LOG2_UNIT_SIZE);
-    setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockingStrength, widthInBaseUnits);
+    uint32_t numUnits  = 1 << (cu->m_log2CUSize[absPartIdx] - cu->m_tuDepth[absPartIdx] - LOG2_UNIT_SIZE);
+    setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockStrength, numUnits);
 }
 
-void Deblock::setEdgefilterPU(CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockingStrength[], uint32_t widthInBaseUnits)
+void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
 {
-    const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
-    const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
+    const uint32_t hNumUnits = numUnits >> 1;
+    const uint32_t qNumUnits = numUnits >> 2;
 
     switch (cu->m_partSize[absPartIdx])
     {
     case SIZE_2NxN:
         if (EDGE_HOR == dir)
-            setEdgefilterMultiple(cu, absPartIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+            setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
         break;
     case SIZE_Nx2N:
         if (EDGE_VER == dir)
-            setEdgefilterMultiple(cu, absPartIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+            setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
         break;
     case SIZE_NxN:
-        setEdgefilterMultiple(cu, absPartIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+        setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
         break;
     case SIZE_2NxnU:
         if (EDGE_HOR == dir)
-            setEdgefilterMultiple(cu, absPartIdx, dir, qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+            setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
         break;
     case SIZE_nLx2N:
         if (EDGE_VER == dir)
-            setEdgefilterMultiple(cu, absPartIdx, dir, qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+            setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
         break;
     case SIZE_2NxnD:
         if (EDGE_HOR == dir)
-            setEdgefilterMultiple(cu, absPartIdx, dir, widthInBaseUnits - qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+            setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
         break;
     case SIZE_nRx2N:
         if (EDGE_VER == dir)
-            setEdgefilterMultiple(cu, absPartIdx, dir, widthInBaseUnits - qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+            setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
         break;
 
     case SIZE_2Nx2N:
@@ -171,151 +192,65 @@
     }
 }
 
-void Deblock::setLoopfilterParam(CUData* cu, uint32_t absPartIdx, Param *params)
+uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[])
 {
-    uint32_t x = cu->m_cuPelX + g_zscanToPelX[absPartIdx];
-    uint32_t y = cu->m_cuPelY + g_zscanToPelY[absPartIdx];
+    // Calculate block index
+    uint32_t partP;
+    const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
 
-    const CUData* tempCU;
-    uint32_t    tempPartIdx;
+    // Set BS for Intra MB : BS = 2
+    if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
+        return 2;
 
-    if (!x)
-        params->leftEdge = 0;
-    else
+    // Set BS for not Intra MB : BS = 1 or 0
+    if (blockStrength[partQ] > 1 &&
+        (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) ||
+         cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP])))
+        return 1;
+
+    static const MV zeroMv(0, 0);
+    const Slice* const sliceQ = cuQ->m_slice;
+    const Slice* const sliceP = cuP->m_slice;
+
+    const Frame* refP0 = sliceP->getRefPic(0, cuP->m_refIdx[0][partP]);
+    const Frame* refQ0 = sliceQ->getRefPic(0, cuQ->m_refIdx[0][partQ]);
+    const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
+    const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
+
+    if (sliceQ->isInterP() && sliceP->isInterP())
     {
-        tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
-        if (tempCU)
-            params->leftEdge = 2;
-        else
-            params->leftEdge = 0;
+        return ((refP0 != refQ0) ||
+                (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0;
     }
 
-    if (!y)
-        params->topEdge = 0;
-    else
+    // (sliceQ->isInterB() || sliceP->isInterB())
+    const Frame* refP1 = sliceP->getRefPic(1, cuP->m_refIdx[1][partP]);
+    const Frame* refQ1 = sliceQ->getRefPic(1, cuQ->m_refIdx[1][partQ]);
+    const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
+    const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
+
+    if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
     {
-        tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
-        if (tempCU)
-            params->topEdge = 2;
-        else
-            params->topEdge = 0;
-    }
-}
-
-void Deblock::getBoundaryStrengthSingle(CUData* cu, int32_t dir, uint32_t absPartIdx, uint8_t blockingStrength[])
-{
-    const Slice* const slice = cu->m_slice;
-    const uint32_t partQ = absPartIdx;
-    CUData* const cuQ = cu;
-
-    uint32_t partP;
-    const CUData* cuP;
-    uint8_t bs = 0;
-
-    // Calculate block index
-    if (dir == EDGE_VER)
-        cuP = cuQ->getPULeft(partP, partQ);
-    else // (dir == EDGE_HOR)
-        cuP = cuQ->getPUAbove(partP, partQ);
-
-    // Set BS for Intra MB : BS = 4 or 3
-    if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
-        bs = 2;
-
-    // Set BS for not Intra MB : BS = 2 or 1 or 0
-    if (!cuP->isIntra(partP) && !cuQ->isIntra(partQ))
-    {
-        uint32_t nsPartQ = partQ;
-        uint32_t nsPartP = partP;
-
-        if (blockingStrength[absPartIdx] > 1 &&
-            (cuQ->getCbf(nsPartQ, TEXT_LUMA, cuQ->m_tuDepth[nsPartQ]) ||
-             cuP->getCbf(nsPartP, TEXT_LUMA, cuP->m_tuDepth[nsPartP])))
-            bs = 1;
-        else
+        if (refP0 != refP1) // Different L0 & L1
         {
-            if (dir == EDGE_HOR)
-                cuP = cuQ->getPUAbove(partP, partQ);
-
-            if (slice->isInterB() || cuP->m_slice->isInterB())
-            {
-                int32_t refIdx;
-                Frame *refP0, *refP1, *refQ0, *refQ1;
-                refIdx = cuP->m_refIdx[0][partP];
-                refP0 = (refIdx < 0) ? NULL : cuP->m_slice->m_refPicList[0][refIdx];
-                refIdx = cuP->m_refIdx[1][partP];
-                refP1 = (refIdx < 0) ? NULL : cuP->m_slice->m_refPicList[1][refIdx];
-                refIdx = cuQ->m_refIdx[0][partQ];
-                refQ0 = (refIdx < 0) ? NULL : slice->m_refPicList[0][refIdx];
-                refIdx = cuQ->m_refIdx[1][partQ];
-                refQ1 = (refIdx < 0) ? NULL : slice->m_refPicList[1][refIdx];
-
-                MV mvp0 = cuP->m_mv[0][partP];
-                MV mvp1 = cuP->m_mv[1][partP];
-                MV mvq0 = cuQ->m_mv[0][partQ];
-                MV mvq1 = cuQ->m_mv[1][partQ];
-
-                if (!refP0) mvp0 = 0;
-                if (!refP1) mvp1 = 0;
-                if (!refQ0) mvq0 = 0;
-                if (!refQ1) mvq1 = 0;
-
-                if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
-                {
-                    if (refP0 != refP1) // Different L0 & L1
-                    {
-                        if (refP0 == refQ0)
-                        {
-                            bs  = ((abs(mvq0.x - mvp0.x) >= 4) ||
-                                   (abs(mvq0.y - mvp0.y) >= 4) ||
-                                   (abs(mvq1.x - mvp1.x) >= 4) ||
-                                   (abs(mvq1.y - mvp1.y) >= 4)) ? 1 : 0;
-                        }
-                        else
-                        {
-                            bs  = ((abs(mvq1.x - mvp0.x) >= 4) ||
-                                   (abs(mvq1.y - mvp0.y) >= 4) ||
-                                   (abs(mvq0.x - mvp1.x) >= 4) ||
-                                   (abs(mvq0.y - mvp1.y) >= 4)) ? 1 : 0;
-                        }
-                    }
-                    else // Same L0 & L1
-                    {
-                        bs  = ((abs(mvq0.x - mvp0.x) >= 4) ||
-                               (abs(mvq0.y - mvp0.y) >= 4) ||
-                               (abs(mvq1.x - mvp1.x) >= 4) ||
-                               (abs(mvq1.y - mvp1.y) >= 4)) &&
-                              ((abs(mvq1.x - mvp0.x) >= 4) ||
-                               (abs(mvq1.y - mvp0.y) >= 4) ||
-                               (abs(mvq0.x - mvp1.x) >= 4) ||
-                               (abs(mvq0.y - mvp1.y) >= 4)) ? 1 : 0;
-                    }
-                }
-                else // for all different Ref_Idx
-                    bs = 1;
-            }
-            else // slice->isInterP()
-            {
-                int32_t refIdx;
-                Frame *refp0, *refq0;
-                refIdx = cuP->m_refIdx[0][partP];
-                refp0 = (refIdx < 0) ? NULL : cuP->m_slice->m_refPicList[0][refIdx];
-                refIdx = cuQ->m_refIdx[0][partQ];
-                refq0 = (refIdx < 0) ? NULL : slice->m_refPicList[0][refIdx];
-                MV mvp0 = cuP->m_mv[0][partP];
-                MV mvq0 = cuQ->m_mv[0][partQ];
-
-                if (!refp0) mvp0 = 0;
-                if (!refq0) mvq0 = 0;
-
-                bs = ((refp0 != refq0) ||
-                      (abs(mvq0.x - mvp0.x) >= 4) ||
-                      (abs(mvq0.y - mvp0.y) >= 4)) ? 1 : 0;
-            }
+            if (refP0 == refQ0)
+                return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
+                        (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0;
+            else
+                return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
+                        (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0;
+        }
+        else // Same L0 & L1
+        {
+            return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
+                     (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) &&
+                    ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
+                     (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0;
         }
     }
-
-    blockingStrength[absPartIdx] = bs;
+        
+    // for all different Ref_Idx
+    return 1;
 }
 
 static inline int32_t calcDP(pixel* src, intptr_t offset)
@@ -340,46 +275,45 @@
 }
 
 /* Deblocking for the luminance component with strong or weak filter
- * \param src            pointer to picture data
- * \param offset         offset value for picture data
- * \param tc             tc value
- * \param partPNoFilter  indicator to disable filtering on partP
- * \param partQNoFilter  indicator to disable filtering on partQ
- * \param filterSecondP  decision weak filter/no filter for partP
- * \param filterSecondQ  decision weak filter/no filter for partQ */
-static inline void pelFilterLumaStrong(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
+ * \param src     pointer to picture data
+ * \param offset  offset value for picture data
+ * \param tc      tc value
+ * \param maskP   indicator to enable filtering on partP
+ * \param maskQ   indicator to enable filtering on partQ
+ * \param maskP1  decision weak filter/no filter for partP
+ * \param maskQ1  decision weak filter/no filter for partQ */
+static inline void pelFilterLumaStrong(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ)
 {
+    int32_t tc2 = 2 * tc;
+    int32_t tcP = (tc2 & maskP);
+    int32_t tcQ = (tc2 & maskQ);
     for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
     {
         int16_t m4  = (int16_t)src[0];
         int16_t m3  = (int16_t)src[-offset];
         int16_t m5  = (int16_t)src[offset];
         int16_t m2  = (int16_t)src[-offset * 2];
-        int32_t tc2 = 2 * tc;
-        if (!partPNoFilter)
-        {
-            int16_t m1  = (int16_t)src[-offset * 3];
-            int16_t m0  = (int16_t)src[-offset * 4];
-            src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
-            src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
-            src[-offset]     = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
-        }
-        if (!partQNoFilter)
-        {
-            int16_t m6  = (int16_t)src[offset * 2];
-            int16_t m7  = (int16_t)src[offset * 3];
-            src[0]           = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
-            src[offset]      = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
-            src[offset * 2]  = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
-        }
+        int16_t m6  = (int16_t)src[offset * 2];
+        int16_t m1  = (int16_t)src[-offset * 3];
+        int16_t m7  = (int16_t)src[offset * 3];
+        int16_t m0  = (int16_t)src[-offset * 4];
+        src[-offset * 3] = (pixel)(Clip3(-tcP, tcP, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
+        src[-offset * 2] = (pixel)(Clip3(-tcP, tcP, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
+        src[-offset]     = (pixel)(Clip3(-tcP, tcP, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
+        src[0]           = (pixel)(Clip3(-tcQ, tcQ, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
+        src[offset]      = (pixel)(Clip3(-tcQ, tcQ, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
+        src[offset * 2]  = (pixel)(Clip3(-tcQ, tcQ, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
     }
 }
 
 /* Weak filter */
-static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter,
-                                 bool filterSecondP, bool filterSecondQ)
+static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ,
+                                 int32_t maskP1, int32_t maskQ1)
 {
     int32_t thrCut = tc * 10;
+    int32_t tc2 = tc >> 1;
+    maskP1 &= maskP;
+    maskQ1 &= maskQ;
 
     for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
     {
@@ -394,38 +328,31 @@
         {
             delta = Clip3(-tc, tc, delta);
 
-            int32_t tc2 = tc >> 1;
-            if (!partPNoFilter)
+            src[-offset] = Clip(m3 + (delta & maskP));
+            src[0] = Clip(m4 - (delta & maskQ));
+            if (maskP1)
             {
-                src[-offset] = Clip(m3 + delta);
-                if (filterSecondP)
-                {
-                    int16_t m1  = (int16_t)src[-offset * 3];
-                    int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
-                    src[-offset * 2] = Clip(m2 + delta1);
-                }
+                int16_t m1  = (int16_t)src[-offset * 3];
+                int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
+                src[-offset * 2] = Clip(m2 + delta1);
             }
-            if (!partQNoFilter)
+            if (maskQ1)
             {
-                src[0] = Clip(m4 - delta);
-                if (filterSecondQ)
-                {
-                    int16_t m6  = (int16_t)src[offset * 2];
-                    int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
-                    src[offset] = Clip(m5 + delta2);
-                }
+                int16_t m6  = (int16_t)src[offset * 2];
+                int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
+                src[offset] = Clip(m5 + delta2);
             }
         }
     }
 }
 
 /* Deblocking of one line/column for the chrominance component
- * \param src            pointer to picture data
- * \param offset         offset value for picture data
- * \param tc             tc value
- * \param partPNoFilter  indicator to disable filtering on partP
- * \param partQNoFilter  indicator to disable filtering on partQ */
-static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
+ * \param src     pointer to picture data
+ * \param offset  offset value for picture data
+ * \param tc      tc value
+ * \param maskP   indicator to disable filtering on partP
+ * \param maskQ   indicator to disable filtering on partQ */
+static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ)
 {
     for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
     {
@@ -435,31 +362,25 @@
         int16_t m2  = (int16_t)src[-offset * 2];
 
         int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
-        if (!partPNoFilter)
-            src[-offset] = Clip(m3 + delta);
-        if (!partQNoFilter)
-            src[0] = Clip(m4 - delta);
+        src[-offset] = Clip(m3 + (delta & maskP));
+        src[0] = Clip(m4 - (delta & maskQ));
     }
 }
 
-void Deblock::edgeFilterLuma(CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[])
+void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
 {
-    PicYuv* reconYuv = cu->m_encData->m_reconPic;
-    pixel* src = reconYuv->getLumaAddr(cu->m_cuAddr, absPartIdx);
-
-    intptr_t stride = reconYuv->m_stride;
-    uint32_t numParts = cu->m_slice->m_sps->numPartInCUSize >> depth;
+    PicYuv* reconPic = cuQ->m_encData->m_reconPic;
+    pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
+    intptr_t stride = reconPic->m_stride;
+    const PPS* pps = cuQ->m_slice->m_pps;
 
     intptr_t offset, srcStep;
 
-    bool  partPNoFilter = false;
-    bool  partQNoFilter = false;
-    uint32_t  partP = 0;
-    uint32_t  partQ = 0;
-    const CUData* cuP = cu;
-    const CUData* cuQ = cu;
-    int32_t betaOffset = cuQ->m_slice->m_pps->deblockingFilterBetaOffsetDiv2 << 1;
-    int32_t tcOffset = cuQ->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
+    int32_t maskP = -1;
+    int32_t maskQ = -1;
+    int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1;
+    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
+    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
 
     if (dir == EDGE_VER)
     {
@@ -474,106 +395,103 @@
         src += (edge << LOG2_UNIT_SIZE) * stride;
     }
 
-    for (uint32_t idx = 0; idx < numParts; idx++)
+    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
+    for (uint32_t idx = 0; idx < numUnits; idx++)
     {
-        uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
-        uint32_t bsAbsIdx = calcBsIdx(cu, absPartIdx, dir, edge, idx);
-        uint32_t bs = blockingStrength[bsAbsIdx];
-        if (bs)
+        uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx);
+        uint32_t bs = blockStrength[partQ];
+
+        if (!bs)
+            continue;
+
+        int32_t qpQ = cuQ->m_qp[partQ];
+
+        // Derive neighboring PU index
+        uint32_t partP;
+        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
+
+        int32_t qpP = cuP->m_qp[partP];
+        int32_t qp = (qpP + qpQ + 1) >> 1;
+
+        int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset);
+
+        const int32_t bitdepthShift = X265_DEPTH - 8;
+        int32_t beta = s_betaTable[indexB] << bitdepthShift;
+
+        uint32_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
+        int32_t dp0 = calcDP(src + unitOffset              , offset);
+        int32_t dq0 = calcDQ(src + unitOffset              , offset);
+        int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset);
+        int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset);
+        int32_t d0 = dp0 + dq0;
+        int32_t d3 = dp3 + dq3;
+
+        int32_t d =  d0 + d3;
+
+        if (d >= beta)
+            continue;
+
+        if (bCheckNoFilter)
         {
-            int32_t qpQ = cu->m_qp[bsAbsIdx];
-            partQ = bsAbsIdx;
+            // check if each of PUs is lossless coded
+            maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
+            maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
+        }
 
-            // Derive neighboring PU index
-            if (dir == EDGE_VER)
-                cuP = cuQ->getPULeft(partP, partQ);
-            else // (dir == EDGE_HOR)
-                cuP = cuQ->getPUAbove(partP, partQ);
+        int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
+        int32_t tc = s_tcTable[indexTC] << bitdepthShift;
 
-            int32_t qpP = cuP->m_qp[partP];
-            int32_t qp = (qpP + qpQ + 1) >> 1;
+        bool sw = (2 * d0 < (beta >> 2) &&
+                   2 * d3 < (beta >> 2) &&
+                   useStrongFiltering(offset, beta, tc, src + unitOffset              ) &&
+                   useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3));
 
-            int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset);
+        if (sw)
+            pelFilterLumaStrong(src + unitOffset, srcStep, offset, tc, maskP, maskQ);
+        else
+        {
+            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
+            int32_t dp = dp0 + dp3;
+            int32_t dq = dq0 + dq3;
+            int32_t maskP1 = (dp < sideThreshold ? -1 : 0);
+            int32_t maskQ1 = (dq < sideThreshold ? -1 : 0);
 
-            const int32_t bitdepthShift = X265_DEPTH - 8;
-            int32_t beta = s_betaTable[indexB] << bitdepthShift;
-
-            int32_t dp0 = calcDP(src + srcStep * (unitOffset + 0), offset);
-            int32_t dq0 = calcDQ(src + srcStep * (unitOffset + 0), offset);
-            int32_t dp3 = calcDP(src + srcStep * (unitOffset + 3), offset);
-            int32_t dq3 = calcDQ(src + srcStep * (unitOffset + 3), offset);
-            int32_t d0 = dp0 + dq0;
-            int32_t d3 = dp3 + dq3;
-
-            int32_t d =  d0 + d3;
-
-            if (d < beta)
-            {
-                if (cu->m_slice->m_pps->bTransquantBypassEnabled)
-                {
-                    // check if each of PUs is lossless coded
-                    partPNoFilter = !!cuP->m_tqBypass[partP];
-                    partQNoFilter = !!cuQ->m_tqBypass[partQ];
-                }
-
-                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
-                int32_t tc = s_tcTable[indexTC] << bitdepthShift;
-
-                bool sw = (2 * d0 < (beta >> 2) &&
-                           2 * d3 < (beta >> 2) &&
-                           useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 0)) &&
-                           useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 3)));
-
-                if (sw)
-                    pelFilterLumaStrong(src + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter);
-                else
-                {
-                    int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
-                    int32_t dp = dp0 + dp3;
-                    int32_t dq = dq0 + dq3;
-                    bool filterP = (dp < sideThreshold);
-                    bool filterQ = (dq < sideThreshold);
-
-                    pelFilterLuma(src + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter, filterP, filterQ);
-                }
-            }
+            pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1);
         }
     }
 }
 
-void Deblock::edgeFilterChroma(CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[])
+void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
 {
-    int32_t chFmt = cu->m_chromaFormat, chromaShift;
+    int32_t chFmt = cuQ->m_chromaFormat, chromaShift;
     intptr_t offset, srcStep;
+    const PPS* pps = cuQ->m_slice->m_pps;
 
-    bool partPNoFilter = false;
-    bool partQNoFilter = false;
-    uint32_t partP;
-    uint32_t partQ;
-    const CUData* cuP;
-    const CUData* cuQ = cu;
-    int32_t tcOffset = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
+    int32_t maskP = -1;
+    int32_t maskQ = -1;
+    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
 
     X265_CHECK(((dir == EDGE_VER)
-                ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cu->m_hChromaShift)
-                : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cu->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
+                ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift)
+                : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
                "invalid edge\n");
 
-    PicYuv* reconPic = cu->m_encData->m_reconPic;
+    PicYuv* reconPic = cuQ->m_encData->m_reconPic;
     intptr_t stride = reconPic->m_strideC;
-    intptr_t srcOffset = reconPic->getChromaAddrOffset(cu->m_cuAddr, absPartIdx);
+    intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
+    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
 
     if (dir == EDGE_VER)
     {
-        chromaShift = cu->m_vChromaShift;
-        srcOffset += (edge << (LOG2_UNIT_SIZE - cu->m_hChromaShift));
+        chromaShift = cuQ->m_vChromaShift;
+        srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift));
         offset     = 1;
         srcStep    = stride;
     }
     else // (dir == EDGE_HOR)
     {
-        chromaShift = cu->m_hChromaShift;
-        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cu->m_vChromaShift);
+        chromaShift = cuQ->m_hChromaShift;
+        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift);
         offset     = stride;
         srcStep    = 1;
     }
@@ -582,53 +500,50 @@
     srcChroma[0] = reconPic->m_picOrg[1] + srcOffset;
     srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
 
-    uint32_t numUnits = cu->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
+    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
 
     for (uint32_t idx = 0; idx < numUnits; idx++)
     {
-        uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
-        uint32_t bsAbsIdx = calcBsIdx(cu, absPartIdx, dir, edge, idx << chromaShift);
-        uint32_t bs = blockingStrength[bsAbsIdx];
+        uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx << chromaShift);
+        uint32_t bs = blockStrength[partQ];
 
-        if (bs > 1)
+        if (bs <= 1)
+            continue;
+
+        int32_t qpQ = cuQ->m_qp[partQ];
+
+        // Derive neighboring PU index
+        uint32_t partP;
+        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
+
+        int32_t qpP = cuP->m_qp[partP];
+
+        if (bCheckNoFilter)
         {
-            int32_t qpQ = cu->m_qp[bsAbsIdx];
-            partQ = bsAbsIdx;
+            // check if each of PUs is lossless coded
+            maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
+            maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
+        }
 
-            // Derive neighboring PU index
-            if (dir == EDGE_VER)
-                cuP = cuQ->getPULeft(partP, partQ);
-            else // (dir == EDGE_HOR)
-                cuP = cuQ->getPUAbove(partP, partQ);
-
-            int32_t qpP = cuP->m_qp[partP];
-
-            if (cu->m_slice->m_pps->bTransquantBypassEnabled)
+        uint32_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
+        for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
+        {
+            int32_t chromaQPOffset  = pps->chromaQpOffset[chromaIdx];
+            int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
+            if (qp >= 30)
             {
-                // check if each of PUs is lossless coded
-                partPNoFilter = !!cuP->m_tqBypass[partP];
-                partQNoFilter = !!cuQ->m_tqBypass[partQ];
+                if (chFmt == X265_CSP_I420)
+                    qp = g_chromaScale[qp];
+                else
+                    qp = X265_MIN(qp, 51);
             }
 
-            for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
-            {
-                int32_t chromaQPOffset  = !chromaIdx ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset;
-                int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
-                if (qp >= 30)
-                {
-                    if (chFmt == X265_CSP_I420)
-                        qp = g_chromaScale[qp];
-                    else
-                        qp = X265_MIN(qp, 51);
-                }
+            int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
+            const int32_t bitdepthShift = X265_DEPTH - 8;
+            int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+            pixel* srcC = srcChroma[chromaIdx];
 
-                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
-                const int32_t bitdepthShift = X265_DEPTH - 8;
-                int32_t tc = s_tcTable[indexTC] << bitdepthShift;
-                pixel* srcC = srcChroma[chromaIdx];
-
-                pelFilterChroma(srcC + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter);
-            }
+            pelFilterChroma(srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
         }
     }
 }
diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/common/deblock.h
--- a/source/common/deblock.h	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/common/deblock.h	Wed Nov 05 18:07:30 2014 +0900
@@ -42,31 +42,24 @@
 
     void init() { m_numPartitions = 1 << (g_maxFullDepth * 2); }
 
-    void deblockCTU(CUData* cu, int32_t dir);
+    void deblockCTU(const CUData* ctu, int32_t dir);
 
 protected:
 
     // CU-level deblocking function
-    void deblockCU(CUData* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t Edge, uint8_t blockingStrength[]);
-
-    struct Param
-    {
-        uint8_t leftEdge;
-        uint8_t topEdge;
-    };
+    void deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[]);
 
     // set filtering functions
-    void setLoopfilterParam(CUData* cu, uint32_t absZOrderIdx, Param *params);
-    void setEdgefilterTU(CUData* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, uint8_t blockingStrength[]);
-    void setEdgefilterPU(CUData* cu, uint32_t absZOrderIdx, int32_t dir, uint8_t blockingStrength[], uint32_t widthInBaseUnits);
-    void setEdgefilterMultiple(CUData* cu, uint32_t absZOrderIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockingStrength[], uint32_t widthInBaseUnits);
+    void setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockStrength[]);
+    void setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits);
+    void setEdgefilterMultiple(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits);
 
     // get filtering functions
-    void getBoundaryStrengthSingle(CUData* cu, int32_t dir, uint32_t partIdx, uint8_t blockingStrength[]);
+    uint8_t getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[]);
 
     // filter luma/chroma functions
-    void edgeFilterLuma(CUData* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[]);
-    void edgeFilterChroma(CUData* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[]);
+    void edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]);
+    void edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]);
 
     static const uint8_t s_tcTable[54];
     static const uint8_t s_betaTable[52];
diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/common/quant.cpp
--- a/source/common/quant.cpp	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/common/quant.cpp	Wed Nov 05 18:07:30 2014 +0900
@@ -195,8 +195,8 @@
     m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : NULL;
     int qpy = ctu.m_qp[0];
     m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET);
-    setChromaQP(qpy + ctu.m_slice->m_pps->chromaCbQpOffset, TEXT_CHROMA_U, ctu.m_chromaFormat);
-    setChromaQP(qpy + ctu.m_slice->m_pps->chromaCrQpOffset, TEXT_CHROMA_V, ctu.m_chromaFormat);
+    setChromaQP(qpy + ctu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, ctu.m_chromaFormat);
+    setChromaQP(qpy + ctu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, ctu.m_chromaFormat);
 }
 
 void Quant::setChromaQP(int qpin, TextType ttype, int chFmt)
diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/common/slice.h
--- a/source/common/slice.h	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/common/slice.h	Wed Nov 05 18:07:30 2014 +0900
@@ -242,8 +242,7 @@
 {
     uint32_t maxCuDQPDepth;
 
-    int      chromaCbQpOffset;       // use param
-    int      chromaCrQpOffset;       // use param
+    int      chromaQpOffset[2];      // use param
 
     bool     bUseWeightPred;         // use param
     bool     bUseWeightedBiPred;     // use param
@@ -334,6 +333,8 @@
 
     void setRefPicList(PicList& picList);
 
+    const Frame* getRefPic(int list, int refIdx) const { return refIdx >= 0 ? m_refPicList[list][refIdx] : NULL; }
+
     bool getRapPicFlag() const
     {
         return m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/encoder/encoder.cpp	Wed Nov 05 18:07:30 2014 +0900
@@ -1280,8 +1280,8 @@
         pps->maxCuDQPDepth = 0;
     }
 
-    pps->chromaCbQpOffset = m_param->cbQpOffset;
-    pps->chromaCrQpOffset = m_param->crQpOffset;
+    pps->chromaQpOffset[0] = m_param->cbQpOffset;
+    pps->chromaQpOffset[1] = m_param->crQpOffset;
 
     pps->bConstrainedIntraPred = m_param->bEnableConstrainedIntra;
     pps->bUseWeightPred = m_param->bEnableWeightedPred;
diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/encoder/entropy.cpp	Wed Nov 05 18:07:30 2014 +0900
@@ -154,8 +154,8 @@
     if (pps.bUseDQP)
         WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
 
-    WRITE_SVLC(pps.chromaCbQpOffset,       "pps_cb_qp_offset");
-    WRITE_SVLC(pps.chromaCrQpOffset,       "pps_cr_qp_offset");
+    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
+    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
     WRITE_FLAG(0,                          "pps_slice_chroma_qp_offsets_present_flag");
 
     WRITE_FLAG(pps.bUseWeightPred,            "weighted_pred_flag");
diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/encoder/framefilter.cpp	Wed Nov 05 18:07:30 2014 +0900
@@ -100,19 +100,19 @@
         for (uint32_t col = 0; col < numCols; col++)
         {
             uint32_t cuAddr = lineStartCUAddr + col;
-            CUData* cu = encData.getPicCTU(cuAddr);
+            const CUData* ctu = encData.getPicCTU(cuAddr);
 
-            m_deblock.deblockCTU(cu, Deblock::EDGE_VER);
+            m_deblock.deblockCTU(ctu, Deblock::EDGE_VER);
 
             if (col > 0)
             {
-                CUData* cuPrev = encData.getPicCTU(cuAddr - 1);
-                m_deblock.deblockCTU(cuPrev, Deblock::EDGE_HOR);
+                const CUData* ctuPrev = encData.getPicCTU(cuAddr - 1);
+                m_deblock.deblockCTU(ctuPrev, Deblock::EDGE_HOR);
             }
         }
 
-        CUData* cuPrev = encData.getPicCTU(lineStartCUAddr + numCols - 1);
-        m_deblock.deblockCTU(cuPrev, Deblock::EDGE_HOR);
+        const CUData* ctuPrev = encData.getPicCTU(lineStartCUAddr + numCols - 1);
+        m_deblock.deblockCTU(ctuPrev, Deblock::EDGE_HOR);
     }
 
     // SAO
diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/encoder/rdcost.h
--- a/source/encoder/rdcost.h	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/encoder/rdcost.h	Wed Nov 05 18:07:30 2014 +0900
@@ -52,12 +52,12 @@
 
         setLambda(x265_lambda2_tab[qp], x265_lambda_tab[qp]);
 
-        int qpCb = Clip3(QP_MIN, QP_MAX_MAX, qp + slice.m_pps->chromaCbQpOffset);
+        int qpCb = Clip3(QP_MIN, QP_MAX_MAX, qp + slice.m_pps->chromaQpOffset[0]);
         int chroma_offset_idx = X265_MIN(qp - qpCb + 12, MAX_CHROMA_LAMBDA_OFFSET);
         uint16_t lambdaOffset = m_psyRd ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
         setCbDistortionWeight(lambdaOffset);
 
-        int qpCr = Clip3(QP_MIN, QP_MAX_MAX, qp + slice.m_pps->chromaCrQpOffset);
+        int qpCr = Clip3(QP_MIN, QP_MAX_MAX, qp + slice.m_pps->chromaQpOffset[1]);
         chroma_offset_idx = X265_MIN(qp - qpCr + 12, MAX_CHROMA_LAMBDA_OFFSET);
         lambdaOffset = m_psyRd ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
         setCrDistortionWeight(lambdaOffset);
diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/encoder/sao.cpp	Wed Nov 05 18:07:30 2014 +0900
@@ -177,7 +177,7 @@
 {
     Slice* slice = frame->m_encData->m_slice;
 
-    int qpCb = Clip3(0, QP_MAX_MAX, qp + slice->m_pps->chromaCbQpOffset);
+    int qpCb = Clip3(0, QP_MAX_MAX, qp + slice->m_pps->chromaQpOffset[0]);
     m_lumaLambda = x265_lambda2_tab[qp];
     m_chromaLambda = x265_lambda2_tab[qpCb]; // Use Cb QP for SAO chroma
     m_frame = frame;



More information about the x265-devel mailing list