[x265] refine deblocking filter

Steve Borho steve at borho.org
Thu Sep 25 03:26:06 CEST 2014


On 09/24, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1411549726 -32400
> #      Wed Sep 24 18:08:46 2014 +0900
> # Node ID 9f96fc8374d834d424190b0b1581054996985b67
> # Parent  b2b7072ddbf73085d457bd6a71bca946e505dea8
> refine deblocking filter

Hi Satoshi,

I pushed this one since it looked harmless, but the automated tests are
reporting decoder hash mismatches which bisect to this commit.  Can you
take a look?

repro:
vc11-x86_64-8bpp-Release
x265 BasketballDrive_1920x1080_50.y4m o.bin --preset superfast -f 30 --hash 1

> diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/Lib/TLibCommon/TComPicYuv.h
> --- a/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 24 11:48:15 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 24 18:08:46 2014 +0900
> @@ -155,6 +155,8 @@
>  
>      pixel*  getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
>  
> +    int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
> +
>      uint32_t getCUHeight(int rowNum);
>  
>      void  copyFromPicture(const x265_picture&, int padx, int pady);
> diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.cpp
> --- a/source/common/deblock.cpp	Wed Sep 24 11:48:15 2014 +0530
> +++ b/source/common/deblock.cpp	Wed Sep 24 18:08:46 2014 +0900
> @@ -48,7 +48,7 @@
>          return;
>  
>      Frame* pic = cu->m_pic;
> -    uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
> +    uint32_t curNumParts = m_numPartitions >> (depth * 2);
>  
>      if (cu->getDepth(absZOrderIdx) > depth)
>      {
> @@ -56,35 +56,34 @@
>          uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples  - cu->getCUPelX();
>          uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->getCUPelY();
>          for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
> -        {
>              if (g_zscanToPelX[absZOrderIdx] < xmax && g_zscanToPelY[absZOrderIdx] < ymax)
>                  deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
> -        }
>          return;
>      }
>  
>      Param params;
>      setLoopfilterParam(cu, absZOrderIdx, &params);
> -    setEdgefilterTU(cu, absZOrderIdx, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
> +    setEdgefilterTU(cu, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
>      setEdgefilterPU(cu, absZOrderIdx, dir, &params, edgeFilter, blockingStrength);
>  
>      for (uint32_t partIdx = absZOrderIdx; partIdx < absZOrderIdx + curNumParts; partIdx++)
>      {
> -        uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx & 1) : !(partIdx & 2));
> +        uint32_t bsCheck = !(partIdx & (1 << dir));
>  
> -        if (edgeFilter[partIdx] && bsCheck)
> +        if (bsCheck && edgeFilter[partIdx])
>              getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
>      }
>  
> -    uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
> +    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
>      uint32_t sizeInPU = pic->getNumPartInCUSize() >> depth;
>      uint32_t shiftFactor = (dir == EDGE_VER) ? cu->getHorzChromaShift() : cu->getVertChromaShift();
> -    const bool alwaysDoChroma = cu->getChromaFormat() == X265_CSP_I444;
> -
> +    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
> +    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absZOrderIdx] : g_zscanToPelY[absZOrderIdx]) >> LOG2_UNIT_SIZE;
> +        
>      for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
>      {
>          edgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
> -        if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE)))
> +        if (!((e0 + e) & chromaMask))
>              edgeFilterChroma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
>      }
>  }
> @@ -115,66 +114,60 @@
>      }
>  }
>  
> -void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
> +void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
>  {
>      if (cu->getTransformIdx(absZOrderIdx) + cu->getDepth(absZOrderIdx) > (uint8_t)depth)
>      {
> -        const uint32_t curNumParts = cu->m_pic->getNumPartInCU() >> (depth << 1);
> +        const uint32_t curNumParts = m_numPartitions >> (depth * 2);
>          const uint32_t qNumParts   = curNumParts >> 2;
>  
>          for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
> -        {
> -            uint32_t nsAddr = absZOrderIdx;
> -            setEdgefilterTU(cu, nsAddr, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
> -        }
> +            setEdgefilterTU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
>          return;
>      }
>  
>      uint32_t widthInBaseUnits  = 1 << (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) - LOG2_UNIT_SIZE);
> -    setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
> +    setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
>  }
>  
>  void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[])
>  {
>      const uint32_t depth = cu->getDepth(absZOrderIdx);
>      const uint32_t widthInBaseUnits  = cu->m_pic->getNumPartInCUSize() >> depth;
> -    const uint32_t hWidthInBaseUnits  = widthInBaseUnits  >> 1;
> -    const uint32_t qWidthInBaseUnits  = widthInBaseUnits  >> 2;
> +    const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
> +    const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
>  
>      setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, (dir == EDGE_VER ? params->leftEdge : params->topEdge), edgeFilter, blockingStrength);
>  
> -    int32_t mode = cu->getPartitionSize(absZOrderIdx);
> -    switch (mode)
> +    switch (cu->getPartitionSize(absZOrderIdx))
>      {
>      case SIZE_2NxN:
> +        if (EDGE_HOR == dir)
> +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> +        break;
>      case SIZE_Nx2N:
> -        {
> -            const int32_t realDir = (mode == SIZE_2NxN ? EDGE_HOR : EDGE_VER);
> -            if (realDir == dir)
> -                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> -            break;
> -        }
> +        if (EDGE_VER == dir)
> +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> +        break;
>      case SIZE_NxN:
> -        {
> -            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> -            break;
> -        }
> +        setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> +        break;
>      case SIZE_2NxnU:
> +        if (EDGE_HOR == dir)
> +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> +        break;
>      case SIZE_nLx2N:
> -        {
> -            const int32_t realDir = (mode == SIZE_2NxnU ? EDGE_HOR : EDGE_VER);
> -            if (realDir == dir)
> -                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> -            break;
> -        }
> +        if (EDGE_VER == dir)
> +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> +        break;
>      case SIZE_2NxnD:
> +        if (EDGE_HOR == dir)
> +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> +        break;
>      case SIZE_nRx2N:
> -        {
> -            const int32_t realDir = (mode == SIZE_2NxnD ? EDGE_HOR : EDGE_VER);
> -            if (realDir == dir)
> -                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> -            break;
> -        }
> +        if (EDGE_VER == dir)
> +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> +        break;
>  
>      case SIZE_2Nx2N:
>      default:
> @@ -338,15 +331,15 @@
>      return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
>  }
>  
> -static inline bool useStrongFiltering(int32_t offset, int32_t d, int32_t beta, int32_t tc, pixel* src)
> +static inline bool useStrongFiltering(int32_t offset, int32_t beta, int32_t tc, pixel* src)
>  {
> +    int16_t m0     = (int16_t)src[-offset * 4];
> +    int16_t m3     = (int16_t)src[-offset];
>      int16_t m4     = (int16_t)src[0];
> -    int16_t m3     = (int16_t)src[-offset];
>      int16_t m7     = (int16_t)src[offset * 3];
> -    int16_t m0     = (int16_t)src[-offset * 4];
>      int32_t strong = abs(m0 - m3) + abs(m7 - m4);
>  
> -    return (strong < (beta >> 3)) && (d < (beta >> 2)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
> +    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
>  }
>  
>  /* Deblocking for the luminance component with strong or weak filter
> @@ -362,63 +355,61 @@
>  static inline void pelFilterLuma(pixel* src, int32_t offset, int32_t tc, bool sw, bool partPNoFilter, bool partQNoFilter,
>                                   int32_t thrCut, bool filterSecondP, bool filterSecondQ)
>  {
> -    int32_t delta;
> -
> +    int16_t m1  = (int16_t)src[-offset * 3];
> +    int16_t m2  = (int16_t)src[-offset * 2];
> +    int16_t m3  = (int16_t)src[-offset];
>      int16_t m4  = (int16_t)src[0];
> -    int16_t m3  = (int16_t)src[-offset];
>      int16_t m5  = (int16_t)src[offset];
> -    int16_t m2  = (int16_t)src[-offset * 2];
>      int16_t m6  = (int16_t)src[offset * 2];
> -    int16_t m1  = (int16_t)src[-offset * 3];
> -    int16_t m7  = (int16_t)src[offset * 3];
> -    int16_t m0  = (int16_t)src[-offset * 4];
>  
>      if (sw)
>      {
> -        src[-offset]     = (pixel)Clip3(m3 - 2 * tc, m3 + 2 * tc, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3));
> -        src[0]           = (pixel)Clip3(m4 - 2 * tc, m4 + 2 * tc, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3));
> -        src[-offset * 2] = (pixel)Clip3(m2 - 2 * tc, m2 + 2 * tc, ((m1 + m2 + m3 + m4 + 2) >> 2));
> -        src[offset]      = (pixel)Clip3(m5 - 2 * tc, m5 + 2 * tc, ((m3 + m4 + m5 + m6 + 2) >> 2));
> -        src[-offset * 3] = (pixel)Clip3(m1 - 2 * tc, m1 + 2 * tc, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3));
> -        src[offset * 2]  = (pixel)Clip3(m6 - 2 * tc, m6 + 2 * tc, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3));
> +        int16_t m0  = (int16_t)src[-offset * 4];
> +        int16_t m7  = (int16_t)src[offset * 3];
> +        int32_t tc2 = 2 * tc;
> +        if (!partPNoFilter)
> +        {
> +            src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
> +            src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
> +            src[-offset]     = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
> +        }
> +        if (!partQNoFilter)
> +        {
> +            src[0]           = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
> +            src[offset]      = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
> +            src[offset * 2]  = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
> +        }
>      }
>      else
>      {
>          /* Weak filter */
> -        delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
> +        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
>  
>          if (abs(delta) < thrCut)
>          {
>              delta = Clip3(-tc, tc, delta);
> -            src[-offset] = Clip(m3 + delta);
> -            src[0] = Clip(m4 - delta);
>  
>              int32_t tc2 = tc >> 1;
> -            if (filterSecondP)
> +            if (!partPNoFilter)
>              {
> -                int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
> -                src[-offset * 2] = Clip(m2 + delta1);
> +                src[-offset] = Clip(m3 + delta);
> +                if (filterSecondP)
> +                {
> +                    int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
> +                    src[-offset * 2] = Clip(m2 + delta1);
> +                }
>              }
> -            if (filterSecondQ)
> +            if (!partQNoFilter)
>              {
> -                int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
> -                src[offset] = Clip(m5 + delta2);
> +                src[0] = Clip(m4 - delta);
> +                if (filterSecondQ)
> +                {
> +                    int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
> +                    src[offset] = Clip(m5 + delta2);
> +                }
>              }
>          }
>      }
> -
> -    if (partPNoFilter)
> -    {
> -        src[-offset] = (pixel)m3;
> -        src[-offset * 2] = (pixel)m2;
> -        src[-offset * 3] = (pixel)m1;
> -    }
> -    if (partQNoFilter)
> -    {
> -        src[0] = (pixel)m4;
> -        src[offset] = (pixel)m5;
> -        src[offset * 2] = (pixel)m6;
> -    }
>  }
>  
>  /* Deblocking of one line/column for the chrominance component
> @@ -429,34 +420,26 @@
>   * \param partQNoFilter  indicator to disable filtering on partQ */
>  static inline void pelFilterChroma(pixel* src, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
>  {
> -    int32_t delta;
> +    int16_t m2  = (int16_t)src[-offset * 2];
> +    int16_t m3  = (int16_t)src[-offset];
> +    int16_t m4  = (int16_t)src[0];
> +    int16_t m5  = (int16_t)src[offset];
>  
> -    int16_t m4  = (int16_t)src[0];
> -    int16_t m3  = (int16_t)src[-offset];
> -    int16_t m5  = (int16_t)src[offset];
> -    int16_t m2  = (int16_t)src[-offset * 2];
> -
> -    delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
> -    src[-offset] = Clip(m3 + delta);
> -    src[0] = Clip(m4 - delta);
> -
> -    if (partPNoFilter)
> -        src[-offset] = (pixel)m3;
> -    if (partQNoFilter)
> -        src[0] = (pixel)m4;
> +    int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
> +    if (!partPNoFilter)
> +        src[-offset] = Clip(m3 + delta);
> +    if (!partQNoFilter)
> +        src[0] = Clip(m4 - delta);
>  }
>  
>  void Deblock::edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
>  {
>      TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
>      pixel* src = reconYuv->getLumaAddr(cu->getAddr(), absZOrderIdx);
> -    pixel* tmpsrc = src;
>  
>      int32_t stride = reconYuv->getStride();
>      uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
>  
> -    uint32_t blocksInPart = (LOG2_UNIT_SIZE - 2) > 0 ? 1 << (LOG2_UNIT_SIZE - 2) : 1;
> -    uint32_t bsAbsIdx = 0, bs = 0;
>      int32_t offset, srcStep;
>  
>      bool  partPNoFilter = false;
> @@ -472,20 +455,20 @@
>      {
>          offset = 1;
>          srcStep = stride;
> -        tmpsrc += (edge << LOG2_UNIT_SIZE);
> +        src += (edge << LOG2_UNIT_SIZE);
>      }
>      else // (dir == EDGE_HOR)
>      {
>          offset = stride;
>          srcStep = 1;
> -        tmpsrc += (edge << LOG2_UNIT_SIZE) * stride;
> +        src += (edge << LOG2_UNIT_SIZE) * stride;
>      }
>  
>      for (uint32_t idx = 0; idx < numParts; idx++)
>      {
> -        uint32_t partOffset = idx << LOG2_UNIT_SIZE;
> -        bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
> -        bs = blockingStrength[bsAbsIdx];
> +        uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
> +        uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
> +        uint32_t bs = blockingStrength[bsAbsIdx];
>          if (bs)
>          {
>              int32_t qpQ = cu->getQP(bsAbsIdx);
> @@ -499,29 +482,25 @@
>  
>              int32_t qpP = cuP->getQP(partP);
>              int32_t qp = (qpP + qpQ + 1) >> 1;
> -            int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
>  
> -            int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
>              int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset);
>  
> -            int32_t tc = s_tcTable[indexTC] * bitdepthScale;
> -            int32_t beta = s_betaTable[indexB] * bitdepthScale;
> -            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
> -            int32_t thrCut = tc * 10;
> +            const int32_t bitdepthShift = X265_DEPTH - 8;
> +            int32_t beta = s_betaTable[indexB] << bitdepthShift;
>  
> -            for (uint32_t blkIdx = 0; blkIdx < blocksInPart; blkIdx++)
> +            int32_t dp0 = calcDP(src + srcStep * (unitOffset + 0), offset);
> +            int32_t dq0 = calcDQ(src + srcStep * (unitOffset + 0), offset);
> +            int32_t dp3 = calcDP(src + srcStep * (unitOffset + 3), offset);
> +            int32_t dq3 = calcDQ(src + srcStep * (unitOffset + 3), offset);
> +            int32_t d0 = dp0 + dq0;
> +            int32_t d3 = dp3 + dq3;
> +
> +            int32_t dp = dp0 + dp3;
> +            int32_t dq = dq0 + dq3;
> +            int32_t d =  d0 + d3;
> +
> +            if (d < beta)
>              {
> -                int32_t dp0 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
> -                int32_t dq0 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
> -                int32_t dp3 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
> -                int32_t dq3 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
> -                int32_t d0 = dp0 + dq0;
> -                int32_t d3 = dp3 + dq3;
> -
> -                int32_t dp = dp0 + dp3;
> -                int32_t dq = dq0 + dq3;
> -                int32_t d =  d0 + d3;
> -
>                  if (cu->m_slice->m_pps->bTransquantBypassEnabled)
>                  {
>                      // check if each of PUs is lossless coded
> @@ -529,17 +508,21 @@
>                      partQNoFilter = cuQ->getCUTransquantBypass(partQ);
>                  }
>  
> -                if (d < beta)
> -                {
> -                    bool filterP = (dp < sideThreshold);
> -                    bool filterQ = (dq < sideThreshold);
> +                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
> +                int32_t tc = s_tcTable[indexTC] << bitdepthShift;
> +                int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
> +                int32_t thrCut = tc * 10;
>  
> -                    bool sw = useStrongFiltering(offset, 2 * d0, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0))
> -                           && useStrongFiltering(offset, 2 * d3, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3));
> +                bool filterP = (dp < sideThreshold);
> +                bool filterQ = (dq < sideThreshold);
>  
> -                    for (int32_t i = 0; i < DEBLOCK_SMALLEST_BLOCK / 2; i++)
> -                        pelFilterLuma(tmpsrc + srcStep * (partOffset + blkIdx * 4 + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP, filterQ);
> -                }
> +                bool sw = (2 * d0 < (beta >> 2) &&
> +                           2 * d3 < (beta >> 2) &&
> +                           useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 0)) &&
> +                           useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 3)));
> +
> +                for (int32_t i = 0; i < UNIT_SIZE; i++)
> +                    pelFilterLuma(src + srcStep * (unitOffset + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP, filterQ);
>              }
>          }
>      }
> @@ -548,17 +531,7 @@
>  void Deblock::edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
>  {
>      int32_t chFmt = cu->getChromaFormat();
> -    TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
> -    int32_t stride = reconYuv->getCStride();
> -    pixel* srcCb = reconYuv->getCbAddr(cu->getAddr(), absZOrderIdx);
> -    pixel* srcCr = reconYuv->getCrAddr(cu->getAddr(), absZOrderIdx);
> -    uint32_t log2UnitSizeH = LOG2_UNIT_SIZE - cu->getHorzChromaShift();
> -    uint32_t log2UnitSizeV = LOG2_UNIT_SIZE - cu->getVertChromaShift();
> -    uint32_t sizeChromaH = 1 << log2UnitSizeH;
> -    uint32_t sizeChromaV = 1 << log2UnitSizeV;
> -    int32_t offset, srcStep;
> -
> -    const uint32_t lcuWidthInBaseUnits = cu->m_pic->getNumPartInCUSize();
> +    int32_t offset, srcStep, chromaShift;
>  
>      bool partPNoFilter = false;
>      bool partQNoFilter = false;
> @@ -568,44 +541,42 @@
>      TComDataCU* cuQ = cu;
>      int32_t tcOffset = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
>  
> -    // Vertical Position
> -    uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] % lcuWidthInBaseUnits + edge;
> -    uint32_t edgeNumInLCUHor = g_zscanToRaster[absZOrderIdx] / lcuWidthInBaseUnits + edge;
> +    X265_CHECK(((dir == EDGE_VER)
> +                ? ((g_zscanToPelX[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getHorzChromaShift())
> +                : ((g_zscanToPelY[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getVertChromaShift())) % DEBLOCK_SMALLEST_BLOCK == 0,
> +               "invalid edge\n");
>  
> -    if ((sizeChromaH < DEBLOCK_SMALLEST_BLOCK) && (sizeChromaV < DEBLOCK_SMALLEST_BLOCK) &&
> -        (((edgeNumInLCUVert % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeH)) && !dir) ||
> -         ((edgeNumInLCUHor % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeV)) && dir)))
> -        return;
>  
> -    uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
> -    uint32_t bsAbsIdx;
> -    uint8_t bs;
> -
> -    pixel* tmpSrcCb = srcCb;
> -    pixel* tmpSrcCr = srcCr;
> -    uint32_t loopLength;
> +    TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
> +    int32_t stride = reconYuv->getCStride();
> +    int32_t srcOffset = reconYuv->getChromaAddrOffset(cu->getAddr(), absZOrderIdx);
>  
>      if (dir == EDGE_VER)
>      {
> +        chromaShift = cu->getVertChromaShift();
> +        srcOffset += (edge << (LOG2_UNIT_SIZE - cu->getHorzChromaShift()));
>          offset     = 1;
>          srcStep    = stride;
> -        tmpSrcCb  += (edge << log2UnitSizeH);
> -        tmpSrcCr  += (edge << log2UnitSizeH);
> -        loopLength = sizeChromaV;
>      }
>      else // (dir == EDGE_HOR)
>      {
> +        chromaShift = cu->getHorzChromaShift();
> +        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cu->getVertChromaShift());
>          offset     = stride;
>          srcStep    = 1;
> -        tmpSrcCb  += edge * stride << log2UnitSizeV;
> -        tmpSrcCr  += edge * stride << log2UnitSizeV;
> -        loopLength = sizeChromaH;
>      }
>  
> -    for (uint32_t idx = 0; idx < numParts; idx++)
> +    pixel* srcChroma[2];
> +    srcChroma[0] = reconYuv->getCbAddr() + srcOffset;
> +    srcChroma[1] = reconYuv->getCrAddr() + srcOffset;
> +
> +    uint32_t numUnits = cu->m_pic->getNumPartInCUSize() >> (depth + chromaShift);
> +
> +    for (uint32_t idx = 0; idx < numUnits; idx++)
>      {
> -        bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
> -        bs = blockingStrength[bsAbsIdx];
> +        uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
> +        uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx << chromaShift);
> +        uint32_t bs = blockingStrength[bsAbsIdx];
>  
>          if (bs > 1)
>          {
> @@ -630,7 +601,6 @@
>              for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
>              {
>                  int32_t chromaQPOffset  = !chromaIdx ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset;
> -                pixel* tmpSrcChroma = !chromaIdx ? tmpSrcCb : tmpSrcCr;
>                  int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
>                  if (qp >= 30)
>                  {
> @@ -640,12 +610,13 @@
>                          qp = X265_MIN(qp, 51);
>                  }
>  
> -                int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
> -                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset);
> -                int32_t tc = s_tcTable[indexTC] * bitdepthScale;
> +                int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
> +                const int32_t bitdepthShift = X265_DEPTH - 8;
> +                int32_t tc = s_tcTable[indexTC] << bitdepthShift;
> +                pixel* srcC = srcChroma[chromaIdx];
>  
> -                for (uint32_t step = 0; step < loopLength; step++)
> -                    pelFilterChroma(tmpSrcChroma + srcStep * (step + idx * loopLength), offset, tc, partPNoFilter, partQNoFilter);
> +                for (int32_t i = 0; i < UNIT_SIZE; i++)
> +                    pelFilterChroma(srcC + srcStep * (unitOffset + i), offset, tc, partPNoFilter, partQNoFilter);
>              }
>          }
>      }
> diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.h
> --- a/source/common/deblock.h	Wed Sep 24 11:48:15 2014 +0530
> +++ b/source/common/deblock.h	Wed Sep 24 18:08:46 2014 +0900
> @@ -58,7 +58,7 @@
>  
>      // set filtering functions
>      void setLoopfilterParam(TComDataCU* cu, uint32_t absZOrderIdx, Param *params);
> -    void setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
> +    void setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
>      void setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[]);
>      void setEdgefilterMultiple(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edgeIdx, bool value, bool edgeFilter[], uint8_t blockingStrength[], uint32_t widthInBaseUnits = 0);
>  
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list