[x265] refine deblocking filter

Satoshi Nakagawa nakagawa424 at oki.com
Thu Sep 25 05:31:01 CEST 2014


I can't reproduce it.

940cec3bf0b4 and 532d0266e333 (parent) are same results.
no mismatch.

> -----Original Message-----
> From: x265-devel [mailto:x265-devel-bounces at videolan.org] On Behalf Of
> Steve Borho
> Sent: Thursday, September 25, 2014 10:26 AM
> To: Development for x265
> Subject: Re: [x265] refine deblocking filter
> 
> On 09/24, Satoshi Nakagawa wrote:
> > # HG changeset patch
> > # User Satoshi Nakagawa <nakagawa424 at oki.com> # Date 1411549726 -32400
> > #      Wed Sep 24 18:08:46 2014 +0900
> > # Node ID 9f96fc8374d834d424190b0b1581054996985b67
> > # Parent  b2b7072ddbf73085d457bd6a71bca946e505dea8
> > refine deblocking filter
> 
> Hi Satoshi,
> 
> I pushed this one since it looked harmless, but the automated tests are
> reporting decoder hash mismatches which bisect to this commit.  Can you
> take a look?
> 
> repro:
> vc11-x86_64-8bpp-Release
> x265 BasketballDrive_1920x1080_50.y4m o.bin --preset superfast -f 30
> --hash 1
> 
> > diff -r b2b7072ddbf7 -r 9f96fc8374d8
> source/Lib/TLibCommon/TComPicYuv.h
> > --- a/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 24 11:48:15 2014
> +0530
> > +++ b/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 24 18:08:46 2014
> +0900
> > @@ -155,6 +155,8 @@
> >
> >      pixel*  getChromaAddr(uint32_t chromaId, int cuAddr, int
> > absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] +
> > m_buOffsetC[absZOrderIdx]; }
> >
> > +    int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) {
> > + return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
> > +
> >      uint32_t getCUHeight(int rowNum);
> >
> >      void  copyFromPicture(const x265_picture&, int padx, int pady);
> > diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.cpp
> > --- a/source/common/deblock.cpp	Wed Sep 24 11:48:15 2014 +0530
> > +++ b/source/common/deblock.cpp	Wed Sep 24 18:08:46 2014 +0900
> > @@ -48,7 +48,7 @@
> >          return;
> >
> >      Frame* pic = cu->m_pic;
> > -    uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
> > +    uint32_t curNumParts = m_numPartitions >> (depth * 2);
> >
> >      if (cu->getDepth(absZOrderIdx) > depth)
> >      {
> > @@ -56,35 +56,34 @@
> >          uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples
> - cu->getCUPelX();
> >          uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples
> - cu->getCUPelY();
> >          for (uint32_t partIdx = 0; partIdx < 4; partIdx++,
> absZOrderIdx += qNumParts)
> > -        {
> >              if (g_zscanToPelX[absZOrderIdx] < xmax &&
> g_zscanToPelY[absZOrderIdx] < ymax)
> >                  deblockCU(cu, absZOrderIdx, depth + 1, dir,
> edgeFilter, blockingStrength);
> > -        }
> >          return;
> >      }
> >
> >      Param params;
> >      setLoopfilterParam(cu, absZOrderIdx, &params);
> > -    setEdgefilterTU(cu, absZOrderIdx, absZOrderIdx, depth, dir,
> edgeFilter, blockingStrength);
> > +    setEdgefilterTU(cu, absZOrderIdx, depth, dir, edgeFilter,
> > + blockingStrength);
> >      setEdgefilterPU(cu, absZOrderIdx, dir, &params, edgeFilter,
> > blockingStrength);
> >
> >      for (uint32_t partIdx = absZOrderIdx; partIdx < absZOrderIdx +
> curNumParts; partIdx++)
> >      {
> > -        uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx & 1) :
!(partIdx
> & 2));
> > +        uint32_t bsCheck = !(partIdx & (1 << dir));
> >
> > -        if (edgeFilter[partIdx] && bsCheck)
> > +        if (bsCheck && edgeFilter[partIdx])
> >              getBoundaryStrengthSingle(cu, dir, partIdx,
> blockingStrength);
> >      }
> >
> > -    uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
> > +    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >>
> > + LOG2_UNIT_SIZE;
> >      uint32_t sizeInPU = pic->getNumPartInCUSize() >> depth;
> >      uint32_t shiftFactor = (dir == EDGE_VER) ?
> cu->getHorzChromaShift() : cu->getVertChromaShift();
> > -    const bool alwaysDoChroma = cu->getChromaFormat() ==
> X265_CSP_I444;
> > -
> > +    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor)
> >> LOG2_UNIT_SIZE) - 1;
> > +    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absZOrderIdx] :
> > + g_zscanToPelY[absZOrderIdx]) >> LOG2_UNIT_SIZE;
> > +
> >      for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
> >      {
> >          edgeFilterLuma(cu, absZOrderIdx, depth, dir, e,
> blockingStrength);
> > -        if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK <<
> shiftFactor) >> LOG2_UNIT_SIZE)))
> > +        if (!((e0 + e) & chromaMask))
> >              edgeFilterChroma(cu, absZOrderIdx, depth, dir, e,
> blockingStrength);
> >      }
> >  }
> > @@ -115,66 +114,60 @@
> >      }
> >  }
> >
> > -void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx,
> > uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[],
> > uint8_t blockingStrength[])
> > +void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx,
> > +uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t
> > +blockingStrength[])
> >  {
> >      if (cu->getTransformIdx(absZOrderIdx) +
> cu->getDepth(absZOrderIdx) > (uint8_t)depth)
> >      {
> > -        const uint32_t curNumParts = cu->m_pic->getNumPartInCU() >>
> (depth << 1);
> > +        const uint32_t curNumParts = m_numPartitions >> (depth * 2);
> >          const uint32_t qNumParts   = curNumParts >> 2;
> >
> >          for (uint32_t partIdx = 0; partIdx < 4; partIdx++,
> absZOrderIdx += qNumParts)
> > -        {
> > -            uint32_t nsAddr = absZOrderIdx;
> > -            setEdgefilterTU(cu, nsAddr, absZOrderIdx, depth + 1, dir,
> edgeFilter, blockingStrength);
> > -        }
> > +            setEdgefilterTU(cu, absZOrderIdx, depth + 1, dir,
> > + edgeFilter, blockingStrength);
> >          return;
> >      }
> >
> >      uint32_t widthInBaseUnits  = 1 <<
> (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) -
> LOG2_UNIT_SIZE);
> > -    setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true,
> edgeFilter, blockingStrength, widthInBaseUnits);
> > +    setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, true,
> > + edgeFilter, blockingStrength, widthInBaseUnits);
> >  }
> >
> >  void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t
> absZOrderIdx,
> > int32_t dir, Param *params, bool edgeFilter[], uint8_t
> blockingStrength[])  {
> >      const uint32_t depth = cu->getDepth(absZOrderIdx);
> >      const uint32_t widthInBaseUnits  =
> cu->m_pic->getNumPartInCUSize() >> depth;
> > -    const uint32_t hWidthInBaseUnits  = widthInBaseUnits  >> 1;
> > -    const uint32_t qWidthInBaseUnits  = widthInBaseUnits  >> 2;
> > +    const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
> > +    const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
> >
> >      setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, (dir ==
> > EDGE_VER ? params->leftEdge : params->topEdge), edgeFilter,
> > blockingStrength);
> >
> > -    int32_t mode = cu->getPartitionSize(absZOrderIdx);
> > -    switch (mode)
> > +    switch (cu->getPartitionSize(absZOrderIdx))
> >      {
> >      case SIZE_2NxN:
> > +        if (EDGE_HOR == dir)
> > +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> > +        break;
> >      case SIZE_Nx2N:
> > -        {
> > -            const int32_t realDir = (mode == SIZE_2NxN ? EDGE_HOR :
> EDGE_VER);
> > -            if (realDir == dir)
> > -                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> > -            break;
> > -        }
> > +        if (EDGE_VER == dir)
> > +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> > +        break;
> >      case SIZE_NxN:
> > -        {
> > -            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> > -            break;
> > -        }
> > +        setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> > +        break;
> >      case SIZE_2NxnU:
> > +        if (EDGE_HOR == dir)
> > +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> > +        break;
> >      case SIZE_nLx2N:
> > -        {
> > -            const int32_t realDir = (mode == SIZE_2NxnU ? EDGE_HOR :
> EDGE_VER);
> > -            if (realDir == dir)
> > -                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> > -            break;
> > -        }
> > +        if (EDGE_VER == dir)
> > +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> > +        break;
> >      case SIZE_2NxnD:
> > +        if (EDGE_HOR == dir)
> > +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter,
> blockingStrength);
> > +        break;
> >      case SIZE_nRx2N:
> > -        {
> > -            const int32_t realDir = (mode == SIZE_2NxnD ? EDGE_HOR :
> EDGE_VER);
> > -            if (realDir == dir)
> > -                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter,
> blockingStrength);
> > -            break;
> > -        }
> > +        if (EDGE_VER == dir)
> > +            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir,
> widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter,
> blockingStrength);
> > +        break;
> >
> >      case SIZE_2Nx2N:
> >      default:
> > @@ -338,15 +331,15 @@
> >      return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] +
> > src[offset * 2]);  }
> >
> > -static inline bool useStrongFiltering(int32_t offset, int32_t d,
> > int32_t beta, int32_t tc, pixel* src)
> > +static inline bool useStrongFiltering(int32_t offset, int32_t beta,
> > +int32_t tc, pixel* src)
> >  {
> > +    int16_t m0     = (int16_t)src[-offset * 4];
> > +    int16_t m3     = (int16_t)src[-offset];
> >      int16_t m4     = (int16_t)src[0];
> > -    int16_t m3     = (int16_t)src[-offset];
> >      int16_t m7     = (int16_t)src[offset * 3];
> > -    int16_t m0     = (int16_t)src[-offset * 4];
> >      int32_t strong = abs(m0 - m3) + abs(m7 - m4);
> >
> > -    return (strong < (beta >> 3)) && (d < (beta >> 2)) && (abs(m3 -
> m4) < ((tc * 5 + 1) >> 1));
> > +    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1)
> >>
> > + 1));
> >  }
> >
> >  /* Deblocking for the luminance component with strong or weak filter
> > @@ -362,63 +355,61 @@  static inline void pelFilterLuma(pixel* src,
> > int32_t offset, int32_t tc, bool sw, bool partPNoFilter, bool
> partQNoFilter,
> >                                   int32_t thrCut, bool
> filterSecondP,
> > bool filterSecondQ)  {
> > -    int32_t delta;
> > -
> > +    int16_t m1  = (int16_t)src[-offset * 3];
> > +    int16_t m2  = (int16_t)src[-offset * 2];
> > +    int16_t m3  = (int16_t)src[-offset];
> >      int16_t m4  = (int16_t)src[0];
> > -    int16_t m3  = (int16_t)src[-offset];
> >      int16_t m5  = (int16_t)src[offset];
> > -    int16_t m2  = (int16_t)src[-offset * 2];
> >      int16_t m6  = (int16_t)src[offset * 2];
> > -    int16_t m1  = (int16_t)src[-offset * 3];
> > -    int16_t m7  = (int16_t)src[offset * 3];
> > -    int16_t m0  = (int16_t)src[-offset * 4];
> >
> >      if (sw)
> >      {
> > -        src[-offset]     = (pixel)Clip3(m3 - 2 * tc, m3 + 2 * tc, ((m1
> + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3));
> > -        src[0]           = (pixel)Clip3(m4 - 2 * tc, m4 + 2 * tc, ((m2
> + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3));
> > -        src[-offset * 2] = (pixel)Clip3(m2 - 2 * tc, m2 + 2 * tc, ((m1
> + m2 + m3 + m4 + 2) >> 2));
> > -        src[offset]      = (pixel)Clip3(m5 - 2 * tc, m5 + 2 * tc, ((m3
> + m4 + m5 + m6 + 2) >> 2));
> > -        src[-offset * 3] = (pixel)Clip3(m1 - 2 * tc, m1 + 2 * tc, ((2
> * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3));
> > -        src[offset * 2]  = (pixel)Clip3(m6 - 2 * tc, m6 + 2 * tc, ((m3
> + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3));
> > +        int16_t m0  = (int16_t)src[-offset * 4];
> > +        int16_t m7  = (int16_t)src[offset * 3];
> > +        int32_t tc2 = 2 * tc;
> > +        if (!partPNoFilter)
> > +        {
> > +            src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 +
> 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
> > +            src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 +
> m3 + m4 + 2) >> 2) - m2) + m2);
> > +            src[-offset]     = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 *
> m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
> > +        }
> > +        if (!partQNoFilter)
> > +        {
> > +            src[0]           = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 *
> m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
> > +            src[offset]      = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 +
> m5 + m6 + 2) >> 2) - m5) + m5);
> > +            src[offset * 2]  = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 +
> m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
> > +        }
> >      }
> >      else
> >      {
> >          /* Weak filter */
> > -        delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
> > +        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
> >
> >          if (abs(delta) < thrCut)
> >          {
> >              delta = Clip3(-tc, tc, delta);
> > -            src[-offset] = Clip(m3 + delta);
> > -            src[0] = Clip(m4 - delta);
> >
> >              int32_t tc2 = tc >> 1;
> > -            if (filterSecondP)
> > +            if (!partPNoFilter)
> >              {
> > -                int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1)
> >> 1) - m2 + delta) >> 1));
> > -                src[-offset * 2] = Clip(m2 + delta1);
> > +                src[-offset] = Clip(m3 + delta);
> > +                if (filterSecondP)
> > +                {
> > +                    int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 +
> 1) >> 1) - m2 + delta) >> 1));
> > +                    src[-offset * 2] = Clip(m2 + delta1);
> > +                }
> >              }
> > -            if (filterSecondQ)
> > +            if (!partQNoFilter)
> >              {
> > -                int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1)
> >> 1) - m5 - delta) >> 1));
> > -                src[offset] = Clip(m5 + delta2);
> > +                src[0] = Clip(m4 - delta);
> > +                if (filterSecondQ)
> > +                {
> > +                    int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 +
> 1) >> 1) - m5 - delta) >> 1));
> > +                    src[offset] = Clip(m5 + delta2);
> > +                }
> >              }
> >          }
> >      }
> > -
> > -    if (partPNoFilter)
> > -    {
> > -        src[-offset] = (pixel)m3;
> > -        src[-offset * 2] = (pixel)m2;
> > -        src[-offset * 3] = (pixel)m1;
> > -    }
> > -    if (partQNoFilter)
> > -    {
> > -        src[0] = (pixel)m4;
> > -        src[offset] = (pixel)m5;
> > -        src[offset * 2] = (pixel)m6;
> > -    }
> >  }
> >
> >  /* Deblocking of one line/column for the chrominance component @@
> > -429,34 +420,26 @@
> >   * \param partQNoFilter  indicator to disable filtering on partQ */
> > static inline void pelFilterChroma(pixel* src, int32_t offset, int32_t
> > tc, bool partPNoFilter, bool partQNoFilter)  {
> > -    int32_t delta;
> > +    int16_t m2  = (int16_t)src[-offset * 2];
> > +    int16_t m3  = (int16_t)src[-offset];
> > +    int16_t m4  = (int16_t)src[0];
> > +    int16_t m5  = (int16_t)src[offset];
> >
> > -    int16_t m4  = (int16_t)src[0];
> > -    int16_t m3  = (int16_t)src[-offset];
> > -    int16_t m5  = (int16_t)src[offset];
> > -    int16_t m2  = (int16_t)src[-offset * 2];
> > -
> > -    delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
> > -    src[-offset] = Clip(m3 + delta);
> > -    src[0] = Clip(m4 - delta);
> > -
> > -    if (partPNoFilter)
> > -        src[-offset] = (pixel)m3;
> > -    if (partQNoFilter)
> > -        src[0] = (pixel)m4;
> > +    int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4)
> >> 3));
> > +    if (!partPNoFilter)
> > +        src[-offset] = Clip(m3 + delta);
> > +    if (!partQNoFilter)
> > +        src[0] = Clip(m4 - delta);
> >  }
> >
> >  void Deblock::edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx,
> > uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
> {
> >      TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
> >      pixel* src = reconYuv->getLumaAddr(cu->getAddr(),
> absZOrderIdx);
> > -    pixel* tmpsrc = src;
> >
> >      int32_t stride = reconYuv->getStride();
> >      uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
> >
> > -    uint32_t blocksInPart = (LOG2_UNIT_SIZE - 2) > 0 ? 1 <<
> (LOG2_UNIT_SIZE - 2) : 1;
> > -    uint32_t bsAbsIdx = 0, bs = 0;
> >      int32_t offset, srcStep;
> >
> >      bool  partPNoFilter = false;
> > @@ -472,20 +455,20 @@
> >      {
> >          offset = 1;
> >          srcStep = stride;
> > -        tmpsrc += (edge << LOG2_UNIT_SIZE);
> > +        src += (edge << LOG2_UNIT_SIZE);
> >      }
> >      else // (dir == EDGE_HOR)
> >      {
> >          offset = stride;
> >          srcStep = 1;
> > -        tmpsrc += (edge << LOG2_UNIT_SIZE) * stride;
> > +        src += (edge << LOG2_UNIT_SIZE) * stride;
> >      }
> >
> >      for (uint32_t idx = 0; idx < numParts; idx++)
> >      {
> > -        uint32_t partOffset = idx << LOG2_UNIT_SIZE;
> > -        bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
> > -        bs = blockingStrength[bsAbsIdx];
> > +        uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
> > +        uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge,
> idx);
> > +        uint32_t bs = blockingStrength[bsAbsIdx];
> >          if (bs)
> >          {
> >              int32_t qpQ = cu->getQP(bsAbsIdx); @@ -499,29 +482,25 @@
> >
> >              int32_t qpP = cuP->getQP(partP);
> >              int32_t qp = (qpP + qpQ + 1) >> 1;
> > -            int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
> >
> > -            int32_t indexTC = Clip3(0, QP_MAX_SPEC +
> DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs -
> 1) + tcOffset));
> >              int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset);
> >
> > -            int32_t tc = s_tcTable[indexTC] * bitdepthScale;
> > -            int32_t beta = s_betaTable[indexB] * bitdepthScale;
> > -            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
> > -            int32_t thrCut = tc * 10;
> > +            const int32_t bitdepthShift = X265_DEPTH - 8;
> > +            int32_t beta = s_betaTable[indexB] << bitdepthShift;
> >
> > -            for (uint32_t blkIdx = 0; blkIdx < blocksInPart; blkIdx++)
> > +            int32_t dp0 = calcDP(src + srcStep * (unitOffset + 0),
> offset);
> > +            int32_t dq0 = calcDQ(src + srcStep * (unitOffset + 0),
> offset);
> > +            int32_t dp3 = calcDP(src + srcStep * (unitOffset + 3),
> offset);
> > +            int32_t dq3 = calcDQ(src + srcStep * (unitOffset + 3),
> offset);
> > +            int32_t d0 = dp0 + dq0;
> > +            int32_t d3 = dp3 + dq3;
> > +
> > +            int32_t dp = dp0 + dp3;
> > +            int32_t dq = dq0 + dq3;
> > +            int32_t d =  d0 + d3;
> > +
> > +            if (d < beta)
> >              {
> > -                int32_t dp0 = calcDP(tmpsrc + srcStep * (partOffset
> + blkIdx * 4 + 0), offset);
> > -                int32_t dq0 = calcDQ(tmpsrc + srcStep * (partOffset
> + blkIdx * 4 + 0), offset);
> > -                int32_t dp3 = calcDP(tmpsrc + srcStep * (partOffset
> + blkIdx * 4 + 3), offset);
> > -                int32_t dq3 = calcDQ(tmpsrc + srcStep * (partOffset
> + blkIdx * 4 + 3), offset);
> > -                int32_t d0 = dp0 + dq0;
> > -                int32_t d3 = dp3 + dq3;
> > -
> > -                int32_t dp = dp0 + dp3;
> > -                int32_t dq = dq0 + dq3;
> > -                int32_t d =  d0 + d3;
> > -
> >                  if (cu->m_slice->m_pps->bTransquantBypassEnabled)
> >                  {
> >                      // check if each of PUs is lossless coded @@
> > -529,17 +508,21 @@
> >                      partQNoFilter =
> cuQ->getCUTransquantBypass(partQ);
> >                  }
> >
> > -                if (d < beta)
> > -                {
> > -                    bool filterP = (dp < sideThreshold);
> > -                    bool filterQ = (dq < sideThreshold);
> > +                int32_t indexTC = Clip3(0, QP_MAX_SPEC +
> DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs -
> 1) + tcOffset));
> > +                int32_t tc = s_tcTable[indexTC] << bitdepthShift;
> > +                int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
> > +                int32_t thrCut = tc * 10;
> >
> > -                    bool sw = useStrongFiltering(offset, 2 * d0, beta,
> tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0))
> > -                           && useStrongFiltering(offset, 2 * d3,
> beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3));
> > +                bool filterP = (dp < sideThreshold);
> > +                bool filterQ = (dq < sideThreshold);
> >
> > -                    for (int32_t i = 0; i < DEBLOCK_SMALLEST_BLOCK
> / 2; i++)
> > -                        pelFilterLuma(tmpsrc + srcStep * (partOffset
> + blkIdx * 4 + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut,
> filterP, filterQ);
> > -                }
> > +                bool sw = (2 * d0 < (beta >> 2) &&
> > +                           2 * d3 < (beta >> 2) &&
> > +                           useStrongFiltering(offset, beta, tc, src
> + srcStep * (unitOffset + 0)) &&
> > +                           useStrongFiltering(offset, beta, tc, src
> +
> > + srcStep * (unitOffset + 3)));
> > +
> > +                for (int32_t i = 0; i < UNIT_SIZE; i++)
> > +                    pelFilterLuma(src + srcStep * (unitOffset + i),
> > + offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP,
> > + filterQ);
> >              }
> >          }
> >      }
> > @@ -548,17 +531,7 @@
> >  void Deblock::edgeFilterChroma(TComDataCU* cu, uint32_t
> absZOrderIdx,
> > uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
> {
> >      int32_t chFmt = cu->getChromaFormat();
> > -    TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
> > -    int32_t stride = reconYuv->getCStride();
> > -    pixel* srcCb = reconYuv->getCbAddr(cu->getAddr(), absZOrderIdx);
> > -    pixel* srcCr = reconYuv->getCrAddr(cu->getAddr(), absZOrderIdx);
> > -    uint32_t log2UnitSizeH = LOG2_UNIT_SIZE -
> cu->getHorzChromaShift();
> > -    uint32_t log2UnitSizeV = LOG2_UNIT_SIZE -
> cu->getVertChromaShift();
> > -    uint32_t sizeChromaH = 1 << log2UnitSizeH;
> > -    uint32_t sizeChromaV = 1 << log2UnitSizeV;
> > -    int32_t offset, srcStep;
> > -
> > -    const uint32_t lcuWidthInBaseUnits =
> cu->m_pic->getNumPartInCUSize();
> > +    int32_t offset, srcStep, chromaShift;
> >
> >      bool partPNoFilter = false;
> >      bool partQNoFilter = false;
> > @@ -568,44 +541,42 @@
> >      TComDataCU* cuQ = cu;
> >      int32_t tcOffset =
> > cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
> >
> > -    // Vertical Position
> > -    uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] %
> lcuWidthInBaseUnits + edge;
> > -    uint32_t edgeNumInLCUHor = g_zscanToRaster[absZOrderIdx] /
> lcuWidthInBaseUnits + edge;
> > +    X265_CHECK(((dir == EDGE_VER)
> > +                ? ((g_zscanToPelX[absZOrderIdx] + edge * UNIT_SIZE)
> >> cu->getHorzChromaShift())
> > +                : ((g_zscanToPelY[absZOrderIdx] + edge * UNIT_SIZE)
> >> cu->getVertChromaShift())) % DEBLOCK_SMALLEST_BLOCK == 0,
> > +               "invalid edge\n");
> >
> > -    if ((sizeChromaH < DEBLOCK_SMALLEST_BLOCK) && (sizeChromaV <
> DEBLOCK_SMALLEST_BLOCK) &&
> > -        (((edgeNumInLCUVert % (DEBLOCK_SMALLEST_BLOCK >>
> log2UnitSizeH)) && !dir) ||
> > -         ((edgeNumInLCUHor % (DEBLOCK_SMALLEST_BLOCK >>
> log2UnitSizeV)) && dir)))
> > -        return;
> >
> > -    uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
> > -    uint32_t bsAbsIdx;
> > -    uint8_t bs;
> > -
> > -    pixel* tmpSrcCb = srcCb;
> > -    pixel* tmpSrcCr = srcCr;
> > -    uint32_t loopLength;
> > +    TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
> > +    int32_t stride = reconYuv->getCStride();
> > +    int32_t srcOffset =
> reconYuv->getChromaAddrOffset(cu->getAddr(),
> > + absZOrderIdx);
> >
> >      if (dir == EDGE_VER)
> >      {
> > +        chromaShift = cu->getVertChromaShift();
> > +        srcOffset += (edge << (LOG2_UNIT_SIZE -
> > + cu->getHorzChromaShift()));
> >          offset     = 1;
> >          srcStep    = stride;
> > -        tmpSrcCb  += (edge << log2UnitSizeH);
> > -        tmpSrcCr  += (edge << log2UnitSizeH);
> > -        loopLength = sizeChromaV;
> >      }
> >      else // (dir == EDGE_HOR)
> >      {
> > +        chromaShift = cu->getHorzChromaShift();
> > +        srcOffset += edge * stride << (LOG2_UNIT_SIZE -
> > + cu->getVertChromaShift());
> >          offset     = stride;
> >          srcStep    = 1;
> > -        tmpSrcCb  += edge * stride << log2UnitSizeV;
> > -        tmpSrcCr  += edge * stride << log2UnitSizeV;
> > -        loopLength = sizeChromaH;
> >      }
> >
> > -    for (uint32_t idx = 0; idx < numParts; idx++)
> > +    pixel* srcChroma[2];
> > +    srcChroma[0] = reconYuv->getCbAddr() + srcOffset;
> > +    srcChroma[1] = reconYuv->getCrAddr() + srcOffset;
> > +
> > +    uint32_t numUnits = cu->m_pic->getNumPartInCUSize() >> (depth +
> > + chromaShift);
> > +
> > +    for (uint32_t idx = 0; idx < numUnits; idx++)
> >      {
> > -        bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
> > -        bs = blockingStrength[bsAbsIdx];
> > +        uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
> > +        uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge,
> idx << chromaShift);
> > +        uint32_t bs = blockingStrength[bsAbsIdx];
> >
> >          if (bs > 1)
> >          {
> > @@ -630,7 +601,6 @@
> >              for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
> >              {
> >                  int32_t chromaQPOffset  = !chromaIdx ?
> cu->m_slice->m_pps->chromaCbQpOffset :
> cu->m_slice->m_pps->chromaCrQpOffset;
> > -                pixel* tmpSrcChroma = !chromaIdx ? tmpSrcCb :
> tmpSrcCr;
> >                  int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
> >                  if (qp >= 30)
> >                  {
> > @@ -640,12 +610,13 @@
> >                          qp = X265_MIN(qp, 51);
> >                  }
> >
> > -                int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
> > -                int32_t indexTC = Clip3(0, QP_MAX_SPEC +
> DEFAULT_INTRA_TC_OFFSET, qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) +
> tcOffset);
> > -                int32_t tc = s_tcTable[indexTC] * bitdepthScale;
> > +                int32_t indexTC = Clip3(0, QP_MAX_SPEC +
> DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET +
> tcOffset));
> > +                const int32_t bitdepthShift = X265_DEPTH - 8;
> > +                int32_t tc = s_tcTable[indexTC] << bitdepthShift;
> > +                pixel* srcC = srcChroma[chromaIdx];
> >
> > -                for (uint32_t step = 0; step < loopLength; step++)
> > -                    pelFilterChroma(tmpSrcChroma + srcStep * (step
> + idx * loopLength), offset, tc, partPNoFilter, partQNoFilter);
> > +                for (int32_t i = 0; i < UNIT_SIZE; i++)
> > +                    pelFilterChroma(srcC + srcStep * (unitOffset +
> > + i), offset, tc, partPNoFilter, partQNoFilter);
> >              }
> >          }
> >      }
> > diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.h
> > --- a/source/common/deblock.h	Wed Sep 24 11:48:15 2014 +0530
> > +++ b/source/common/deblock.h	Wed Sep 24 18:08:46 2014 +0900
> > @@ -58,7 +58,7 @@
> >
> >      // set filtering functions
> >      void setLoopfilterParam(TComDataCU* cu, uint32_t absZOrderIdx,
> Param *params);
> > -    void setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx,
> uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[],
> uint8_t blockingStrength[]);
> > +    void setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx,
> > + uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t
> > + blockingStrength[]);
> >      void setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx,
> int32_t dir, Param *params, bool edgeFilter[], uint8_t
> blockingStrength[]);
> >      void setEdgefilterMultiple(TComDataCU* cu, uint32_t
> absZOrderIdx,
> > uint32_t depth, int32_t dir, int32_t edgeIdx, bool value, bool
> > edgeFilter[], uint8_t blockingStrength[], uint32_t widthInBaseUnits
> =
> > 0);
> >
> > _______________________________________________
> > x265-devel mailing list
> > x265-devel at videolan.org
> > https://mailman.videolan.org/listinfo/x265-devel
> 
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



More information about the x265-devel mailing list