[x265] [PATCH] shortyuv: integrated asm primitives for blockcopy

Murugan Vairavel murugan at multicorewareinc.com
Mon Mar 10 12:30:10 CET 2014


Sent a new patch.


On Sat, Mar 8, 2014 at 12:50 AM, Steve Borho <steve at borho.org> wrote:

> On Fri, Mar 7, 2014 at 5:45 AM,  <murugan at multicorewareinc.com> wrote:
> > # HG changeset patch
> > # User Murugan Vairavel <murugan at multicorewareinc.com>
> > # Date 1394192688 -19800
> > #      Fri Mar 07 17:14:48 2014 +0530
> > # Node ID 5c626bf1e275596b45808c14952bd5aea8aaeb3e
> > # Parent  2bf727dca27d6f69e96d4412850661cbe036cbef
> > shortyuv: integrated asm primitives for blockcopy
> >
> > diff -r 2bf727dca27d -r 5c626bf1e275 source/common/shortyuv.cpp
> > --- a/source/common/shortyuv.cpp        Fri Mar 07 15:11:13 2014 +0530
> > +++ b/source/common/shortyuv.cpp        Fri Mar 07 17:14:48 2014 +0530
> > @@ -129,32 +129,27 @@
> >
> >  void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t
> partIdx, uint32_t width, uint32_t height)
> >  {
> > +    int part = partitionFromSizes(width, height);
> >      int16_t* src = getLumaAddr(partIdx);
> >      int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
> >
> >      uint32_t srcStride = m_width;
> >      uint32_t dstStride = dstPicYuv->m_width;
> > -#if HIGH_BIT_DEPTH
> > -    primitives.blockcpy_pp(width, height, (pixel*)dst, dstStride,
> (pixel*)src, srcStride);
> > -#else
> > -    for (uint32_t y = height; y != 0; y--)
> > -    {
> > -        ::memcpy(dst, src, width * sizeof(int16_t));
> > -        src += srcStride;
> > -        dst += dstStride;
> > -    }
> > -#endif
> > +
> > +    primitives.luma_copy_ss[part](dst, dstStride, src, srcStride);
> > +
> >  }
> >
> >  void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx,
> uint32_t width, uint32_t height)
> >  {
> > +    int part = partitionFromSizes(width, height);
> >      int16_t* src = getLumaAddr(partIdx);
> >      pixel* dst = dstPicYuv->getLumaAddr(partIdx);
> >
> >      uint32_t srcStride = m_width;
> >      uint32_t dstStride = dstPicYuv->getStride();
> >
> > -    primitives.blockcpy_ps(width, height, dst, dstStride, src,
> srcStride);
> > +    primitives.luma_copy_sp[part](dst, dstStride, src, srcStride);
> >  }
> >
> >  void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t
> partIdx, uint32_t width, uint32_t height)
> > @@ -163,23 +158,15 @@
> >      int16_t* srcV = getCrAddr(partIdx);
> >      int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> >      int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> > +    width  = width  << m_hChromaShift;
> > +    height = height << m_vChromaShift;
> > +    int part = partitionFromSizes(width, height);
> >
> >      uint32_t srcStride = m_cwidth;
> >      uint32_t dstStride = dstPicYuv->m_cwidth;
> > -#if HIGH_BIT_DEPTH
> > -    primitives.blockcpy_pp(width, height, (pixel*)dstU, dstStride,
> (pixel*)srcU, srcStride);
> > -    primitives.blockcpy_pp(width, height, (pixel*)dstV, dstStride,
> (pixel*)srcV, srcStride);
> > -#else
> > -    for (uint32_t y = height; y != 0; y--)
> > -    {
> > -        ::memcpy(dstU, srcU, width * sizeof(int16_t));
> > -        ::memcpy(dstV, srcV, width * sizeof(int16_t));
> > -        srcU += srcStride;
> > -        srcV += srcStride;
> > -        dstU += dstStride;
> > -        dstV += dstStride;
> > -    }
> > -#endif
> > +
> > +    primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU,
> srcStride);
> > +    primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV,
> srcStride);
>
>
> I'm pretty sure this is wrong.  The chroma primitives are indexed by
> color space and luma partition size, so the function pointer itself
> scales the luma dimensions down to the correct chroma dimensions for
> that color space.
>
> So essentially this function cannot use the chroma block copy
> functions without changing the function arguments and callers to pass
> in luma dimensions or the luma partition enum.
>
> >  }
> >
> >  void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t
> partIdx, uint32_t width, uint32_t height)
> > @@ -188,28 +175,29 @@
> >      int16_t* srcV = getCrAddr(partIdx);
> >      pixel* dstU = dstPicYuv->getCbAddr(partIdx);
> >      pixel* dstV = dstPicYuv->getCrAddr(partIdx);
> > +    width  = width  << m_hChromaShift;
> > +    height = height << m_vChromaShift;
> > +    int part = partitionFromSizes(width, height);
> >
> >      uint32_t srcStride = m_cwidth;
> >      uint32_t dstStride = dstPicYuv->getCStride();
> >
> > -    primitives.blockcpy_ps(width, height, dstU, dstStride, srcU,
> srcStride);
> > -    primitives.blockcpy_ps(width, height, dstV, dstStride, srcV,
> srcStride);
> > +    primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU,
> srcStride);
> > +    primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV,
> srcStride);
>
> ditto
>
> >  }
> >
> >  void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t
> partIdx, uint32_t width, uint32_t height, uint32_t chromaId)
> >  {
> > +    width  = width  << m_hChromaShift;
> > +    height = height << m_vChromaShift;
> > +    int part = partitionFromSizes(width, height);
> >      if (chromaId == 0)
> >      {
> >          int16_t* srcU = getCbAddr(partIdx);
> >          int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> >          uint32_t srcStride = m_cwidth;
> >          uint32_t dstStride = dstPicYuv->m_cwidth;
> > -        for (uint32_t y = height; y != 0; y--)
> > -        {
> > -            ::memcpy(dstU, srcU, width * sizeof(int16_t));
> > -            srcU += srcStride;
> > -            dstU += dstStride;
> > -        }
> > +        primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU,
> srcStride);
> >      }
> >      else if (chromaId == 1)
> >      {
> > @@ -217,12 +205,7 @@
> >          int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> >          uint32_t srcStride = m_cwidth;
> >          uint32_t dstStride = dstPicYuv->m_cwidth;
> > -        for (uint32_t y = height; y != 0; y--)
> > -        {
> > -            ::memcpy(dstV, srcV, width * sizeof(int16_t));
> > -            srcV += srcStride;
> > -            dstV += dstStride;
> > -        }
> > +        primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV,
> srcStride);
> >      }
> >      else
> >      {
> > @@ -232,27 +215,23 @@
> >          int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> >          uint32_t srcStride = m_cwidth;
> >          uint32_t dstStride = dstPicYuv->m_cwidth;
> > -        for (uint32_t y = height; y != 0; y--)
> > -        {
> > -            ::memcpy(dstU, srcU, width * sizeof(int16_t));
> > -            ::memcpy(dstV, srcV, width * sizeof(int16_t));
> > -            srcU += srcStride;
> > -            srcV += srcStride;
> > -            dstU += dstStride;
> > -            dstV += dstStride;
> > -        }
> > +        primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU,
> srcStride);
> > +        primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV,
> srcStride);
>
> and... ditto
>
> >      }
> >  }
> >
> >  void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t
> partIdx, uint32_t width, uint32_t height, uint32_t chromaId)
> >  {
> > +    width  = width  << m_hChromaShift;
> > +    height = height << m_vChromaShift;
> > +    int part = partitionFromSizes(width, height);
> >      if (chromaId == 0)
> >      {
> >          int16_t* srcU = getCbAddr(partIdx);
> >          pixel* dstU = dstPicYuv->getCbAddr(partIdx);
> >          uint32_t srcStride = m_cwidth;
> >          uint32_t dstStride = dstPicYuv->getCStride();
> > -        primitives.blockcpy_ps(width, height, dstU, dstStride, srcU,
> srcStride);
> > +        primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU,
> srcStride);
> >      }
> >      else if (chromaId == 1)
> >      {
> > @@ -260,7 +239,7 @@
> >          pixel* dstV = dstPicYuv->getCrAddr(partIdx);
> >          uint32_t srcStride = m_cwidth;
> >          uint32_t dstStride = dstPicYuv->getCStride();
> > -        primitives.blockcpy_ps(width, height, dstV, dstStride, srcV,
> srcStride);
> > +        primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV,
> srcStride);
> >      }
> >      else
> >      {
> > @@ -271,7 +250,7 @@
> >
> >          uint32_t srcStride = m_cwidth;
> >          uint32_t dstStride = dstPicYuv->getCStride();
> > -        primitives.blockcpy_ps(width, height, dstU, dstStride, srcU,
> srcStride);
> > -        primitives.blockcpy_ps(width, height, dstV, dstStride, srcV,
> srcStride);
> > +        primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU,
> srcStride);
> > +        primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV,
> srcStride);
>
> still busted.
>
> Have a look at how the TComYuv methods were modified to handle this.
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
With Regards,

Murugan. V
+919659287478
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140310/2a02ad4f/attachment-0001.html>


More information about the x265-devel mailing list