[x265] [PATCH] shortyuv: integrated asm primitives for blockcopy
Murugan Vairavel
murugan at multicorewareinc.com
Mon Mar 10 12:30:10 CET 2014
Sent a new patch.
On Sat, Mar 8, 2014 at 12:50 AM, Steve Borho <steve at borho.org> wrote:
> On Fri, Mar 7, 2014 at 5:45 AM, <murugan at multicorewareinc.com> wrote:
> > # HG changeset patch
> > # User Murugan Vairavel <murugan at multicorewareinc.com>
> > # Date 1394192688 -19800
> > # Fri Mar 07 17:14:48 2014 +0530
> > # Node ID 5c626bf1e275596b45808c14952bd5aea8aaeb3e
> > # Parent 2bf727dca27d6f69e96d4412850661cbe036cbef
> > shortyuv: integrated asm primitives for blockcopy
> >
> > diff -r 2bf727dca27d -r 5c626bf1e275 source/common/shortyuv.cpp
> > --- a/source/common/shortyuv.cpp Fri Mar 07 15:11:13 2014 +0530
> > +++ b/source/common/shortyuv.cpp Fri Mar 07 17:14:48 2014 +0530
> > @@ -129,32 +129,27 @@
> >
> > void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t
> partIdx, uint32_t width, uint32_t height)
> > {
> > + int part = partitionFromSizes(width, height);
> > int16_t* src = getLumaAddr(partIdx);
> > int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
> >
> > uint32_t srcStride = m_width;
> > uint32_t dstStride = dstPicYuv->m_width;
> > -#if HIGH_BIT_DEPTH
> > - primitives.blockcpy_pp(width, height, (pixel*)dst, dstStride,
> (pixel*)src, srcStride);
> > -#else
> > - for (uint32_t y = height; y != 0; y--)
> > - {
> > - ::memcpy(dst, src, width * sizeof(int16_t));
> > - src += srcStride;
> > - dst += dstStride;
> > - }
> > -#endif
> > +
> > + primitives.luma_copy_ss[part](dst, dstStride, src, srcStride);
> > +
> > }
> >
> > void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx,
> uint32_t width, uint32_t height)
> > {
> > + int part = partitionFromSizes(width, height);
> > int16_t* src = getLumaAddr(partIdx);
> > pixel* dst = dstPicYuv->getLumaAddr(partIdx);
> >
> > uint32_t srcStride = m_width;
> > uint32_t dstStride = dstPicYuv->getStride();
> >
> > - primitives.blockcpy_ps(width, height, dst, dstStride, src,
> srcStride);
> > + primitives.luma_copy_sp[part](dst, dstStride, src, srcStride);
> > }
> >
> > void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t
> partIdx, uint32_t width, uint32_t height)
> > @@ -163,23 +158,15 @@
> > int16_t* srcV = getCrAddr(partIdx);
> > int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> > int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> > + width = width << m_hChromaShift;
> > + height = height << m_vChromaShift;
> > + int part = partitionFromSizes(width, height);
> >
> > uint32_t srcStride = m_cwidth;
> > uint32_t dstStride = dstPicYuv->m_cwidth;
> > -#if HIGH_BIT_DEPTH
> > - primitives.blockcpy_pp(width, height, (pixel*)dstU, dstStride,
> (pixel*)srcU, srcStride);
> > - primitives.blockcpy_pp(width, height, (pixel*)dstV, dstStride,
> (pixel*)srcV, srcStride);
> > -#else
> > - for (uint32_t y = height; y != 0; y--)
> > - {
> > - ::memcpy(dstU, srcU, width * sizeof(int16_t));
> > - ::memcpy(dstV, srcV, width * sizeof(int16_t));
> > - srcU += srcStride;
> > - srcV += srcStride;
> > - dstU += dstStride;
> > - dstV += dstStride;
> > - }
> > -#endif
> > +
> > + primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU,
> srcStride);
> > + primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV,
> srcStride);
>
>
> I'm pretty sure this is wrong. The chroma primitives are indexed by
> color space and luma partition size, so the function pointer itself
> scales the luma dimensions down to the correct chroma dimensions for
> that color space.
>
> So essentially this function cannot use the chroma block copy
> functions without changing the function arguments and callers to pass
> in luma dimensions or the luma partition enum.
>
> > }
> >
> > void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t
> partIdx, uint32_t width, uint32_t height)
> > @@ -188,28 +175,29 @@
> > int16_t* srcV = getCrAddr(partIdx);
> > pixel* dstU = dstPicYuv->getCbAddr(partIdx);
> > pixel* dstV = dstPicYuv->getCrAddr(partIdx);
> > + width = width << m_hChromaShift;
> > + height = height << m_vChromaShift;
> > + int part = partitionFromSizes(width, height);
> >
> > uint32_t srcStride = m_cwidth;
> > uint32_t dstStride = dstPicYuv->getCStride();
> >
> > - primitives.blockcpy_ps(width, height, dstU, dstStride, srcU,
> srcStride);
> > - primitives.blockcpy_ps(width, height, dstV, dstStride, srcV,
> srcStride);
> > + primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU,
> srcStride);
> > + primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV,
> srcStride);
>
> ditto
>
> > }
> >
> > void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t
> partIdx, uint32_t width, uint32_t height, uint32_t chromaId)
> > {
> > + width = width << m_hChromaShift;
> > + height = height << m_vChromaShift;
> > + int part = partitionFromSizes(width, height);
> > if (chromaId == 0)
> > {
> > int16_t* srcU = getCbAddr(partIdx);
> > int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> > uint32_t srcStride = m_cwidth;
> > uint32_t dstStride = dstPicYuv->m_cwidth;
> > - for (uint32_t y = height; y != 0; y--)
> > - {
> > - ::memcpy(dstU, srcU, width * sizeof(int16_t));
> > - srcU += srcStride;
> > - dstU += dstStride;
> > - }
> > + primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU,
> srcStride);
> > }
> > else if (chromaId == 1)
> > {
> > @@ -217,12 +205,7 @@
> > int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> > uint32_t srcStride = m_cwidth;
> > uint32_t dstStride = dstPicYuv->m_cwidth;
> > - for (uint32_t y = height; y != 0; y--)
> > - {
> > - ::memcpy(dstV, srcV, width * sizeof(int16_t));
> > - srcV += srcStride;
> > - dstV += dstStride;
> > - }
> > + primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV,
> srcStride);
> > }
> > else
> > {
> > @@ -232,27 +215,23 @@
> > int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> > uint32_t srcStride = m_cwidth;
> > uint32_t dstStride = dstPicYuv->m_cwidth;
> > - for (uint32_t y = height; y != 0; y--)
> > - {
> > - ::memcpy(dstU, srcU, width * sizeof(int16_t));
> > - ::memcpy(dstV, srcV, width * sizeof(int16_t));
> > - srcU += srcStride;
> > - srcV += srcStride;
> > - dstU += dstStride;
> > - dstV += dstStride;
> > - }
> > + primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU,
> srcStride);
> > + primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV,
> srcStride);
>
> and... ditto
>
> > }
> > }
> >
> > void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t
> partIdx, uint32_t width, uint32_t height, uint32_t chromaId)
> > {
> > + width = width << m_hChromaShift;
> > + height = height << m_vChromaShift;
> > + int part = partitionFromSizes(width, height);
> > if (chromaId == 0)
> > {
> > int16_t* srcU = getCbAddr(partIdx);
> > pixel* dstU = dstPicYuv->getCbAddr(partIdx);
> > uint32_t srcStride = m_cwidth;
> > uint32_t dstStride = dstPicYuv->getCStride();
> > - primitives.blockcpy_ps(width, height, dstU, dstStride, srcU,
> srcStride);
> > + primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU,
> srcStride);
> > }
> > else if (chromaId == 1)
> > {
> > @@ -260,7 +239,7 @@
> > pixel* dstV = dstPicYuv->getCrAddr(partIdx);
> > uint32_t srcStride = m_cwidth;
> > uint32_t dstStride = dstPicYuv->getCStride();
> > - primitives.blockcpy_ps(width, height, dstV, dstStride, srcV,
> srcStride);
> > + primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV,
> srcStride);
> > }
> > else
> > {
> > @@ -271,7 +250,7 @@
> >
> > uint32_t srcStride = m_cwidth;
> > uint32_t dstStride = dstPicYuv->getCStride();
> > - primitives.blockcpy_ps(width, height, dstU, dstStride, srcU,
> srcStride);
> > - primitives.blockcpy_ps(width, height, dstV, dstStride, srcV,
> srcStride);
> > + primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU,
> srcStride);
> > + primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV,
> srcStride);
>
> still busted.
>
> Have a look at how the TComYuv methods were modified to handle this.
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
With Regards,
Murugan. V
+919659287478
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140310/2a02ad4f/attachment-0001.html>
More information about the x265-devel
mailing list