[x265] refine deblocking filter

Satoshi Nakagawa nakagawa424 at oki.com
Fri Sep 26 12:36:19 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1411727676 -32400
#      Fri Sep 26 19:34:36 2014 +0900
# Node ID 06237deb460b629d6100d5b613d42033cc3477bd
# Parent  7dccbbed034970de161b361cd6e17ed4efca7226
refine deblocking filter

diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Fri Sep 26 19:34:36 2014 +0900
@@ -155,6 +155,8 @@
 
     pixel*  getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
 
+    int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
+
     uint32_t getCUHeight(int rowNum);
 
     void  copyFromPicture(const x265_picture&, int padx, int pady);
diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.cpp	Fri Sep 26 19:34:36 2014 +0900
@@ -115,10 +115,10 @@
 uint32_t g_maxCUSize     = MAX_CU_SIZE;
 uint32_t g_maxFullDepth  = NUM_FULL_DEPTH - 1;
 uint32_t g_maxCUDepth    = NUM_CU_DEPTH - 1;
-uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
-uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
+uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] = { 0, };
+uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] = { 0, };
 
-const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] =
+const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS] =
 {
     0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
     16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
@@ -138,7 +138,7 @@
     48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60
 };
 
-const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W] =
+const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS] =
 {
     0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
     0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.h	Fri Sep 26 19:34:36 2014 +0900
@@ -54,6 +54,8 @@
 #define UNIT_SIZE               (1 << LOG2_UNIT_SIZE)       // unit size of CU partition
 #define TMVP_UNIT_MASK          0xF0                        // mask for mapping index to CompressMV field
 
+#define MAX_NUM_PARTITIONS      256
+
 #define MIN_PU_SIZE             4
 #define MIN_TU_SIZE             4
 #define MAX_NUM_SPU_W           (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number of SPU in horizontal line
@@ -75,15 +77,15 @@
 extern const uint8_t g_chroma422IntraAngleMappingTable[36];
 
 // flexible conversion from relative to absolute index
-extern uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-extern uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
+extern uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS];
+extern uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS];
 
 void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t startVal, uint32_t*& curIdx);
 void initRasterToZscan(uint32_t maxFullDepth);
 
 // conversion of partition index to picture pel position
-extern const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-extern const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
+extern const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS];
+extern const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS];
 
 // global variable (LCU width/height, max. CU depth)
 extern uint32_t g_maxLog2CUSize;
diff -r 7dccbbed0349 -r 06237deb460b source/common/deblock.cpp
--- a/source/common/deblock.cpp	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/common/deblock.cpp	Fri Sep 26 19:34:36 2014 +0900
@@ -32,23 +32,24 @@
 #define DEBLOCK_SMALLEST_BLOCK  8
 #define DEFAULT_INTRA_TC_OFFSET 2
 
-void Deblock::deblockCTU(TComDataCU* cu, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::deblockCTU(TComDataCU* cu, int32_t dir)
 {
+    uint8_t blockingStrength[MAX_NUM_PARTITIONS];
+
     memset(blockingStrength, 0, sizeof(uint8_t) * m_numPartitions);
-    memset(edgeFilter, 0, sizeof(bool) * m_numPartitions);
 
-    deblockCU(cu, 0, 0, dir, edgeFilter, blockingStrength);
+    deblockCU(cu, 0, 0, dir, blockingStrength);
 }
 
 /* Deblocking filter process in CU-based (the same function as conventional's)
  * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
-void Deblock::deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t dir, uint8_t blockingStrength[])
 {
     if (!cu->m_pic || cu->getPartitionSize(absZOrderIdx) == SIZE_NONE)
         return;
 
     Frame* pic = cu->m_pic;
-    uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
+    uint32_t curNumParts = m_numPartitions >> (depth * 2);
 
     if (cu->getDepth(absZOrderIdx) > depth)
     {
@@ -56,35 +57,36 @@
         uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples  - cu->getCUPelX();
         uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->getCUPelY();
         for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
-        {
             if (g_zscanToPelX[absZOrderIdx] < xmax && g_zscanToPelY[absZOrderIdx] < ymax)
-                deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
-        }
+                deblockCU(cu, absZOrderIdx, depth + 1, dir, blockingStrength);
         return;
     }
 
+    const uint32_t widthInBaseUnits  = cu->m_pic->getNumPartInCUSize() >> depth;
     Param params;
     setLoopfilterParam(cu, absZOrderIdx, &params);
-    setEdgefilterTU(cu, absZOrderIdx, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
-    setEdgefilterPU(cu, absZOrderIdx, dir, &params, edgeFilter, blockingStrength);
+    setEdgefilterPU(cu, absZOrderIdx, dir, blockingStrength, widthInBaseUnits);
+    setEdgefilterTU(cu, absZOrderIdx, depth, dir, blockingStrength);
+    setEdgefilterMultiple(cu, absZOrderIdx, dir, 0, (dir == EDGE_VER ? params.leftEdge : params.topEdge), blockingStrength, widthInBaseUnits);
 
     for (uint32_t partIdx = absZOrderIdx; partIdx < absZOrderIdx + curNumParts; partIdx++)
     {
-        uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx & 1) : !(partIdx & 2));
+        uint32_t bsCheck = !(partIdx & (1 << dir));
 
-        if (edgeFilter[partIdx] && bsCheck)
+        if (bsCheck && blockingStrength[partIdx])
             getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
     }
 
-    uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
+    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
     uint32_t sizeInPU = pic->getNumPartInCUSize() >> depth;
     uint32_t shiftFactor = (dir == EDGE_VER) ? cu->getHorzChromaShift() : cu->getVertChromaShift();
-    const bool alwaysDoChroma = cu->getChromaFormat() == X265_CSP_I444;
-
+    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
+    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absZOrderIdx] : g_zscanToPelY[absZOrderIdx]) >> LOG2_UNIT_SIZE;
+        
     for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
     {
         edgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
-        if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE)))
+        if (!((e0 + e) & chromaMask))
             edgeFilterChroma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
     }
 }
@@ -99,82 +101,67 @@
         return g_rasterToZscan[g_zscanToRaster[absZOrderIdx] + baseUnitIdx * lcuWidthInBaseUnits + edgeIdx];
 }
 
-void Deblock::setEdgefilterMultiple(TComDataCU* cu, uint32_t scanIdx, uint32_t depth, int32_t dir, int32_t edgeIdx, bool value, bool edgeFilter[], uint8_t blockingStrength[], uint32_t widthInBaseUnits)
+void Deblock::setEdgefilterMultiple(TComDataCU* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockingStrength[], uint32_t widthInBaseUnits)
 {
-    if (!widthInBaseUnits)
-        widthInBaseUnits = cu->m_pic->getNumPartInCUSize() >> depth;
-
     const uint32_t numElem = widthInBaseUnits;
     X265_CHECK(numElem > 0, "numElem edge filter check\n");
     for (uint32_t i = 0; i < numElem; i++)
     {
         const uint32_t bsidx = calcBsIdx(cu, scanIdx, dir, edgeIdx, i);
-        edgeFilter[bsidx] = value;
-        if (!edgeIdx)
-            blockingStrength[bsidx] = value;
+        blockingStrength[bsidx] = value;
     }
 }
 
-void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, uint8_t blockingStrength[])
 {
     if (cu->getTransformIdx(absZOrderIdx) + cu->getDepth(absZOrderIdx) > (uint8_t)depth)
     {
-        const uint32_t curNumParts = cu->m_pic->getNumPartInCU() >> (depth << 1);
+        const uint32_t curNumParts = m_numPartitions >> (depth * 2);
         const uint32_t qNumParts   = curNumParts >> 2;
 
         for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
-        {
-            uint32_t nsAddr = absZOrderIdx;
-            setEdgefilterTU(cu, nsAddr, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
-        }
+            setEdgefilterTU(cu, absZOrderIdx, depth + 1, dir, blockingStrength);
         return;
     }
 
     uint32_t widthInBaseUnits  = 1 << (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) - LOG2_UNIT_SIZE);
-    setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
+    setEdgefilterMultiple(cu, absZOrderIdx, dir, 0, 2, blockingStrength, widthInBaseUnits);
 }
 
-void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, uint8_t blockingStrength[], uint32_t widthInBaseUnits)
 {
-    const uint32_t depth = cu->getDepth(absZOrderIdx);
-    const uint32_t widthInBaseUnits  = cu->m_pic->getNumPartInCUSize() >> depth;
-    const uint32_t hWidthInBaseUnits  = widthInBaseUnits  >> 1;
-    const uint32_t qWidthInBaseUnits  = widthInBaseUnits  >> 2;
+    const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
+    const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
 
-    setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, (dir == EDGE_VER ? params->leftEdge : params->topEdge), edgeFilter, blockingStrength);
-
-    int32_t mode = cu->getPartitionSize(absZOrderIdx);
-    switch (mode)
+    switch (cu->getPartitionSize(absZOrderIdx))
     {
     case SIZE_2NxN:
+        if (EDGE_HOR == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+        break;
     case SIZE_Nx2N:
-        {
-            const int32_t realDir = (mode == SIZE_2NxN ? EDGE_HOR : EDGE_VER);
-            if (realDir == dir)
-                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
-            break;
-        }
+        if (EDGE_VER == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+        break;
     case SIZE_NxN:
-        {
-            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
-            break;
-        }
+        setEdgefilterMultiple(cu, absZOrderIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+        break;
     case SIZE_2NxnU:
+        if (EDGE_HOR == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, dir, qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+        break;
     case SIZE_nLx2N:
-        {
-            const int32_t realDir = (mode == SIZE_2NxnU ? EDGE_HOR : EDGE_VER);
-            if (realDir == dir)
-                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
-            break;
-        }
+        if (EDGE_VER == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, dir, qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+        break;
     case SIZE_2NxnD:
+        if (EDGE_HOR == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, dir, widthInBaseUnits - qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+        break;
     case SIZE_nRx2N:
-        {
-            const int32_t realDir = (mode == SIZE_2NxnD ? EDGE_HOR : EDGE_VER);
-            if (realDir == dir)
-                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
-            break;
-        }
+        if (EDGE_VER == dir)
+            setEdgefilterMultiple(cu, absZOrderIdx, dir, widthInBaseUnits - qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+        break;
 
     case SIZE_2Nx2N:
     default:
@@ -191,25 +178,25 @@
     uint32_t    tempPartIdx;
 
     if (!x)
-        params->leftEdge = false;
+        params->leftEdge = 0;
     else
     {
         tempCU = cu->getPULeft(tempPartIdx, absZOrderIdx);
         if (tempCU)
-            params->leftEdge = true;
+            params->leftEdge = 2;
         else
-            params->leftEdge = false;
+            params->leftEdge = 0;
     }
 
     if (!y)
-        params->topEdge = false;
+        params->topEdge = 0;
     else
     {
         tempCU = cu->getPUAbove(tempPartIdx, absZOrderIdx);
         if (tempCU)
-            params->topEdge = true;
+            params->topEdge = 2;
         else
-            params->topEdge = false;
+            params->topEdge = 0;
     }
 }
 
@@ -239,8 +226,9 @@
         uint32_t nsPartQ = partQ;
         uint32_t nsPartP = partP;
 
-        if (blockingStrength[absPartIdx] && (cuQ->getCbf(nsPartQ, TEXT_LUMA, cuQ->getTransformIdx(nsPartQ)) ||
-            cuP->getCbf(nsPartP, TEXT_LUMA, cuP->getTransformIdx(nsPartP))))
+        if (blockingStrength[absPartIdx] > 1 &&
+            (cuQ->getCbf(nsPartQ, TEXT_LUMA, cuQ->getTransformIdx(nsPartQ)) ||
+             cuP->getCbf(nsPartP, TEXT_LUMA, cuP->getTransformIdx(nsPartP))))
             bs = 1;
         else
         {
@@ -338,7 +326,7 @@
     return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
 }
 
-static inline bool useStrongFiltering(int32_t offset, int32_t d, int32_t beta, int32_t tc, pixel* src)
+static inline bool useStrongFiltering(int32_t offset, int32_t beta, int32_t tc, pixel* src)
 {
     int16_t m4     = (int16_t)src[0];
     int16_t m3     = (int16_t)src[-offset];
@@ -346,79 +334,87 @@
     int16_t m0     = (int16_t)src[-offset * 4];
     int32_t strong = abs(m0 - m3) + abs(m7 - m4);
 
-    return (strong < (beta >> 3)) && (d < (beta >> 2)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
+    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
 }
 
 /* Deblocking for the luminance component with strong or weak filter
  * \param src            pointer to picture data
  * \param offset         offset value for picture data
  * \param tc             tc value
- * \param sw             decision strong/weak filter
  * \param partPNoFilter  indicator to disable filtering on partP
  * \param partQNoFilter  indicator to disable filtering on partQ
- * \param thrCut         threshold value for weak filter decision
  * \param filterSecondP  decision weak filter/no filter for partP
  * \param filterSecondQ  decision weak filter/no filter for partQ */
-static inline void pelFilterLuma(pixel* src, int32_t offset, int32_t tc, bool sw, bool partPNoFilter, bool partQNoFilter,
-                                 int32_t thrCut, bool filterSecondP, bool filterSecondQ)
+static inline void pelFilterLumaStrong(pixel* src, int32_t srcStep, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
 {
-    int32_t delta;
+    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
+    {
+        int16_t m4  = (int16_t)src[0];
+        int16_t m3  = (int16_t)src[-offset];
+        int16_t m5  = (int16_t)src[offset];
+        int16_t m2  = (int16_t)src[-offset * 2];
+        int32_t tc2 = 2 * tc;
+        if (!partPNoFilter)
+        {
+            int16_t m1  = (int16_t)src[-offset * 3];
+            int16_t m0  = (int16_t)src[-offset * 4];
+            src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
+            src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
+            src[-offset]     = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
+        }
+        if (!partQNoFilter)
+        {
+            int16_t m6  = (int16_t)src[offset * 2];
+            int16_t m7  = (int16_t)src[offset * 3];
+            src[0]           = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
+            src[offset]      = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
+            src[offset * 2]  = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
+        }
+    }
+}
 
-    int16_t m4  = (int16_t)src[0];
-    int16_t m3  = (int16_t)src[-offset];
-    int16_t m5  = (int16_t)src[offset];
-    int16_t m2  = (int16_t)src[-offset * 2];
-    int16_t m6  = (int16_t)src[offset * 2];
-    int16_t m1  = (int16_t)src[-offset * 3];
-    int16_t m7  = (int16_t)src[offset * 3];
-    int16_t m0  = (int16_t)src[-offset * 4];
+/* Weak filter */
+static inline void pelFilterLuma(pixel* src, int32_t srcStep, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter,
+                                 bool filterSecondP, bool filterSecondQ)
+{
+    int32_t thrCut = tc * 10;
 
-    if (sw)
+    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
     {
-        src[-offset]     = (pixel)Clip3(m3 - 2 * tc, m3 + 2 * tc, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3));
-        src[0]           = (pixel)Clip3(m4 - 2 * tc, m4 + 2 * tc, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3));
-        src[-offset * 2] = (pixel)Clip3(m2 - 2 * tc, m2 + 2 * tc, ((m1 + m2 + m3 + m4 + 2) >> 2));
-        src[offset]      = (pixel)Clip3(m5 - 2 * tc, m5 + 2 * tc, ((m3 + m4 + m5 + m6 + 2) >> 2));
-        src[-offset * 3] = (pixel)Clip3(m1 - 2 * tc, m1 + 2 * tc, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3));
-        src[offset * 2]  = (pixel)Clip3(m6 - 2 * tc, m6 + 2 * tc, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3));
-    }
-    else
-    {
-        /* Weak filter */
-        delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
+        int16_t m4  = (int16_t)src[0];
+        int16_t m3  = (int16_t)src[-offset];
+        int16_t m5  = (int16_t)src[offset];
+        int16_t m2  = (int16_t)src[-offset * 2];
+
+        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
 
         if (abs(delta) < thrCut)
         {
             delta = Clip3(-tc, tc, delta);
-            src[-offset] = Clip(m3 + delta);
-            src[0] = Clip(m4 - delta);
 
             int32_t tc2 = tc >> 1;
-            if (filterSecondP)
+            if (!partPNoFilter)
             {
-                int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
-                src[-offset * 2] = Clip(m2 + delta1);
+                src[-offset] = Clip(m3 + delta);
+                if (filterSecondP)
+                {
+                    int16_t m1  = (int16_t)src[-offset * 3];
+                    int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
+                    src[-offset * 2] = Clip(m2 + delta1);
+                }
             }
-            if (filterSecondQ)
+            if (!partQNoFilter)
             {
-                int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
-                src[offset] = Clip(m5 + delta2);
+                src[0] = Clip(m4 - delta);
+                if (filterSecondQ)
+                {
+                    int16_t m6  = (int16_t)src[offset * 2];
+                    int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
+                    src[offset] = Clip(m5 + delta2);
+                }
             }
         }
     }
-
-    if (partPNoFilter)
-    {
-        src[-offset] = (pixel)m3;
-        src[-offset * 2] = (pixel)m2;
-        src[-offset * 3] = (pixel)m1;
-    }
-    if (partQNoFilter)
-    {
-        src[0] = (pixel)m4;
-        src[offset] = (pixel)m5;
-        src[offset * 2] = (pixel)m6;
-    }
 }
 
 /* Deblocking of one line/column for the chrominance component
@@ -427,36 +423,31 @@
  * \param tc             tc value
  * \param partPNoFilter  indicator to disable filtering on partP
  * \param partQNoFilter  indicator to disable filtering on partQ */
-static inline void pelFilterChroma(pixel* src, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
+static inline void pelFilterChroma(pixel* src, int32_t srcStep, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
 {
-    int32_t delta;
+    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
+    {
+        int16_t m4  = (int16_t)src[0];
+        int16_t m3  = (int16_t)src[-offset];
+        int16_t m5  = (int16_t)src[offset];
+        int16_t m2  = (int16_t)src[-offset * 2];
 
-    int16_t m4  = (int16_t)src[0];
-    int16_t m3  = (int16_t)src[-offset];
-    int16_t m5  = (int16_t)src[offset];
-    int16_t m2  = (int16_t)src[-offset * 2];
-
-    delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
-    src[-offset] = Clip(m3 + delta);
-    src[0] = Clip(m4 - delta);
-
-    if (partPNoFilter)
-        src[-offset] = (pixel)m3;
-    if (partQNoFilter)
-        src[0] = (pixel)m4;
+        int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
+        if (!partPNoFilter)
+            src[-offset] = Clip(m3 + delta);
+        if (!partQNoFilter)
+            src[0] = Clip(m4 - delta);
+    }
 }
 
-void Deblock::edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
+void Deblock::edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[])
 {
     TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
     pixel* src = reconYuv->getLumaAddr(cu->getAddr(), absZOrderIdx);
-    pixel* tmpsrc = src;
 
     int32_t stride = reconYuv->getStride();
     uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
 
-    uint32_t blocksInPart = (LOG2_UNIT_SIZE - 2) > 0 ? 1 << (LOG2_UNIT_SIZE - 2) : 1;
-    uint32_t bsAbsIdx = 0, bs = 0;
     int32_t offset, srcStep;
 
     bool  partPNoFilter = false;
@@ -472,20 +463,20 @@
     {
         offset = 1;
         srcStep = stride;
-        tmpsrc += (edge << LOG2_UNIT_SIZE);
+        src += (edge << LOG2_UNIT_SIZE);
     }
     else // (dir == EDGE_HOR)
     {
         offset = stride;
         srcStep = 1;
-        tmpsrc += (edge << LOG2_UNIT_SIZE) * stride;
+        src += (edge << LOG2_UNIT_SIZE) * stride;
     }
 
     for (uint32_t idx = 0; idx < numParts; idx++)
     {
-        uint32_t partOffset = idx << LOG2_UNIT_SIZE;
-        bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
-        bs = blockingStrength[bsAbsIdx];
+        uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
+        uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
+        uint32_t bs = blockingStrength[bsAbsIdx];
         if (bs)
         {
             int32_t qpQ = cu->getQP(bsAbsIdx);
@@ -499,29 +490,23 @@
 
             int32_t qpP = cuP->getQP(partP);
             int32_t qp = (qpP + qpQ + 1) >> 1;
-            int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
 
-            int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
             int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset);
 
-            int32_t tc = s_tcTable[indexTC] * bitdepthScale;
-            int32_t beta = s_betaTable[indexB] * bitdepthScale;
-            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
-            int32_t thrCut = tc * 10;
+            const int32_t bitdepthShift = X265_DEPTH - 8;
+            int32_t beta = s_betaTable[indexB] << bitdepthShift;
 
-            for (uint32_t blkIdx = 0; blkIdx < blocksInPart; blkIdx++)
+            int32_t dp0 = calcDP(src + srcStep * (unitOffset + 0), offset);
+            int32_t dq0 = calcDQ(src + srcStep * (unitOffset + 0), offset);
+            int32_t dp3 = calcDP(src + srcStep * (unitOffset + 3), offset);
+            int32_t dq3 = calcDQ(src + srcStep * (unitOffset + 3), offset);
+            int32_t d0 = dp0 + dq0;
+            int32_t d3 = dp3 + dq3;
+
+            int32_t d =  d0 + d3;
+
+            if (d < beta)
             {
-                int32_t dp0 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
-                int32_t dq0 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
-                int32_t dp3 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
-                int32_t dq3 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
-                int32_t d0 = dp0 + dq0;
-                int32_t d3 = dp3 + dq3;
-
-                int32_t dp = dp0 + dp3;
-                int32_t dq = dq0 + dq3;
-                int32_t d =  d0 + d3;
-
                 if (cu->m_slice->m_pps->bTransquantBypassEnabled)
                 {
                     // check if each of PUs is lossless coded
@@ -529,36 +514,35 @@
                     partQNoFilter = cuQ->getCUTransquantBypass(partQ);
                 }
 
-                if (d < beta)
+                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
+                int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+
+                bool sw = (2 * d0 < (beta >> 2) &&
+                           2 * d3 < (beta >> 2) &&
+                           useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 0)) &&
+                           useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 3)));
+
+                if (sw)
+                    pelFilterLumaStrong(src + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter);
+                else
                 {
+                    int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
+                    int32_t dp = dp0 + dp3;
+                    int32_t dq = dq0 + dq3;
                     bool filterP = (dp < sideThreshold);
                     bool filterQ = (dq < sideThreshold);
 
-                    bool sw = useStrongFiltering(offset, 2 * d0, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0))
-                           && useStrongFiltering(offset, 2 * d3, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3));
-
-                    for (int32_t i = 0; i < DEBLOCK_SMALLEST_BLOCK / 2; i++)
-                        pelFilterLuma(tmpsrc + srcStep * (partOffset + blkIdx * 4 + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP, filterQ);
+                    pelFilterLuma(src + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter, filterP, filterQ);
                 }
             }
         }
     }
 }
 
-void Deblock::edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
+void Deblock::edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[])
 {
     int32_t chFmt = cu->getChromaFormat();
-    TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
-    int32_t stride = reconYuv->getCStride();
-    pixel* srcCb = reconYuv->getCbAddr(cu->getAddr(), absZOrderIdx);
-    pixel* srcCr = reconYuv->getCrAddr(cu->getAddr(), absZOrderIdx);
-    uint32_t log2UnitSizeH = LOG2_UNIT_SIZE - cu->getHorzChromaShift();
-    uint32_t log2UnitSizeV = LOG2_UNIT_SIZE - cu->getVertChromaShift();
-    uint32_t sizeChromaH = 1 << log2UnitSizeH;
-    uint32_t sizeChromaV = 1 << log2UnitSizeV;
-    int32_t offset, srcStep;
-
-    const uint32_t lcuWidthInBaseUnits = cu->m_pic->getNumPartInCUSize();
+    int32_t offset, srcStep, chromaShift;
 
     bool partPNoFilter = false;
     bool partQNoFilter = false;
@@ -568,44 +552,42 @@
     TComDataCU* cuQ = cu;
     int32_t tcOffset = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
 
-    // Vertical Position
-    uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] % lcuWidthInBaseUnits + edge;
-    uint32_t edgeNumInLCUHor = g_zscanToRaster[absZOrderIdx] / lcuWidthInBaseUnits + edge;
+    X265_CHECK(((dir == EDGE_VER)
+                ? ((g_zscanToPelX[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getHorzChromaShift())
+                : ((g_zscanToPelY[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getVertChromaShift())) % DEBLOCK_SMALLEST_BLOCK == 0,
+               "invalid edge\n");
 
-    if ((sizeChromaH < DEBLOCK_SMALLEST_BLOCK) && (sizeChromaV < DEBLOCK_SMALLEST_BLOCK) &&
-        (((edgeNumInLCUVert % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeH)) && !dir) ||
-         ((edgeNumInLCUHor % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeV)) && dir)))
-        return;
 
-    uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
-    uint32_t bsAbsIdx;
-    uint8_t bs;
-
-    pixel* tmpSrcCb = srcCb;
-    pixel* tmpSrcCr = srcCr;
-    uint32_t loopLength;
+    TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
+    int32_t stride = reconYuv->getCStride();
+    int32_t srcOffset = reconYuv->getChromaAddrOffset(cu->getAddr(), absZOrderIdx);
 
     if (dir == EDGE_VER)
     {
+        chromaShift = cu->getVertChromaShift();
+        srcOffset += (edge << (LOG2_UNIT_SIZE - cu->getHorzChromaShift()));
         offset     = 1;
         srcStep    = stride;
-        tmpSrcCb  += (edge << log2UnitSizeH);
-        tmpSrcCr  += (edge << log2UnitSizeH);
-        loopLength = sizeChromaV;
     }
     else // (dir == EDGE_HOR)
     {
+        chromaShift = cu->getHorzChromaShift();
+        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cu->getVertChromaShift());
         offset     = stride;
         srcStep    = 1;
-        tmpSrcCb  += edge * stride << log2UnitSizeV;
-        tmpSrcCr  += edge * stride << log2UnitSizeV;
-        loopLength = sizeChromaH;
     }
 
-    for (uint32_t idx = 0; idx < numParts; idx++)
+    pixel* srcChroma[2];
+    srcChroma[0] = reconYuv->getCbAddr() + srcOffset;
+    srcChroma[1] = reconYuv->getCrAddr() + srcOffset;
+
+    uint32_t numUnits = cu->m_pic->getNumPartInCUSize() >> (depth + chromaShift);
+
+    for (uint32_t idx = 0; idx < numUnits; idx++)
     {
-        bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
-        bs = blockingStrength[bsAbsIdx];
+        uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
+        uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx << chromaShift);
+        uint32_t bs = blockingStrength[bsAbsIdx];
 
         if (bs > 1)
         {
@@ -630,7 +612,6 @@
             for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
             {
                 int32_t chromaQPOffset  = !chromaIdx ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset;
-                pixel* tmpSrcChroma = !chromaIdx ? tmpSrcCb : tmpSrcCr;
                 int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
                 if (qp >= 30)
                 {
@@ -640,12 +621,12 @@
                         qp = X265_MIN(qp, 51);
                 }
 
-                int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
-                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset);
-                int32_t tc = s_tcTable[indexTC] * bitdepthScale;
+                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
+                const int32_t bitdepthShift = X265_DEPTH - 8;
+                int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+                pixel* srcC = srcChroma[chromaIdx];
 
-                for (uint32_t step = 0; step < loopLength; step++)
-                    pelFilterChroma(tmpSrcChroma + srcStep * (step + idx * loopLength), offset, tc, partPNoFilter, partQNoFilter);
+                pelFilterChroma(srcC + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter);
             }
         }
     }
diff -r 7dccbbed0349 -r 06237deb460b source/common/deblock.h
--- a/source/common/deblock.h	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/common/deblock.h	Fri Sep 26 19:34:36 2014 +0900
@@ -43,31 +43,31 @@
 
     void init() { m_numPartitions = 1 << (g_maxFullDepth * 2); }
 
-    void deblockCTU(TComDataCU* cu, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
+    void deblockCTU(TComDataCU* cu, int32_t dir);
 
 protected:
 
     // CU-level deblocking function
-    void deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t Edge, bool edgeFilter[], uint8_t blockingStrength[]);
+    void deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t Edge, uint8_t blockingStrength[]);
 
     struct Param
     {
-        bool leftEdge;
-        bool topEdge;
+        uint8_t leftEdge;
+        uint8_t topEdge;
     };
 
     // set filtering functions
     void setLoopfilterParam(TComDataCU* cu, uint32_t absZOrderIdx, Param *params);
-    void setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
-    void setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[]);
-    void setEdgefilterMultiple(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edgeIdx, bool value, bool edgeFilter[], uint8_t blockingStrength[], uint32_t widthInBaseUnits = 0);
+    void setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, uint8_t blockingStrength[]);
+    void setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, uint8_t blockingStrength[], uint32_t widthInBaseUnits);
+    void setEdgefilterMultiple(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockingStrength[], uint32_t widthInBaseUnits);
 
     // get filtering functions
     void getBoundaryStrengthSingle(TComDataCU* cu, int32_t dir, uint32_t partIdx, uint8_t blockingStrength[]);
 
     // filter luma/chroma functions
-    void edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[]);
-    void edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[]);
+    void edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[]);
+    void edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[]);
 
     static const uint8_t s_tcTable[54];
     static const uint8_t s_betaTable[52];
diff -r 7dccbbed0349 -r 06237deb460b source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/encoder/frameencoder.cpp	Fri Sep 26 19:34:36 2014 +0900
@@ -584,7 +584,7 @@
         processRowEncoder(realRow, tld);
     else
     {
-        processRowFilter(realRow, tld);
+        processRowFilter(realRow);
 
         // NOTE: Active next row
         if (realRow != m_numRows - 1)
diff -r 7dccbbed0349 -r 06237deb460b source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/encoder/frameencoder.h	Fri Sep 26 19:34:36 2014 +0900
@@ -49,10 +49,6 @@
 {
     Analysis analysis;
 
-    // NOTE: the maximum LCU 64x64 have 256 4x4 partitions
-    bool     edgeFilter[256];
-    uint8_t  blockingStrength[256];
-
     ~ThreadLocalData() { analysis.destroy(); }
 };
 
@@ -185,7 +181,7 @@
     /* Called by WaveFront::findJob() */
     void processRow(int row, int threadId);
     void processRowEncoder(int row, ThreadLocalData& tld);
-    void processRowFilter(int row, ThreadLocalData& tld) { m_frameFilter.processRow(row, tld); }
+    void processRowFilter(int row) { m_frameFilter.processRow(row); }
 
     void enqueueRowEncoder(int row) { WaveFront::enqueueRow(row * 2 + 0); }
     void enqueueRowFilter(int row)  { WaveFront::enqueueRow(row * 2 + 1); }
diff -r 7dccbbed0349 -r 06237deb460b source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/encoder/framefilter.cpp	Fri Sep 26 19:34:36 2014 +0900
@@ -78,7 +78,7 @@
         m_sao.startSlice(pic, initState, qp);
 }
 
-void FrameFilter::processRow(int row, ThreadLocalData& tld)
+void FrameFilter::processRow(int row)
 {
     PPAScopeEvent(Thread_filterCU);
 
@@ -98,17 +98,17 @@
             const uint32_t cuAddr = lineStartCUAddr + col;
             TComDataCU* cu = m_frame->getCU(cuAddr);
 
-            m_deblock.deblockCTU(cu, Deblock::EDGE_VER, tld.edgeFilter, tld.blockingStrength);
+            m_deblock.deblockCTU(cu, Deblock::EDGE_VER);
 
             if (col > 0)
             {
                 TComDataCU* cu_prev = m_frame->getCU(cuAddr - 1);
-                m_deblock.deblockCTU(cu_prev, Deblock::EDGE_HOR, tld.edgeFilter, tld.blockingStrength);
+                m_deblock.deblockCTU(cu_prev, Deblock::EDGE_HOR);
             }
         }
 
         TComDataCU* cu_prev = m_frame->getCU(lineStartCUAddr + numCols - 1);
-        m_deblock.deblockCTU(cu_prev, Deblock::EDGE_HOR, tld.edgeFilter, tld.blockingStrength);
+        m_deblock.deblockCTU(cu_prev, Deblock::EDGE_HOR);
     }
 
     // SAO
diff -r 7dccbbed0349 -r 06237deb460b source/encoder/framefilter.h
--- a/source/encoder/framefilter.h	Wed Sep 24 18:26:45 2014 -0500
+++ b/source/encoder/framefilter.h	Fri Sep 26 19:34:36 2014 +0900
@@ -64,7 +64,7 @@
 
     void start(Frame *pic, Entropy& initState, int qp);
 
-    void processRow(int row, ThreadLocalData& tld);
+    void processRow(int row);
     void processRowPost(int row);
     void processSao(int row);
 };



More information about the x265-devel mailing list