[x265] [PATCH] shortyuv: integrated asm primitives for blockcopy
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Fri Mar 7 12:45:19 CET 2014
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1394192688 -19800
# Fri Mar 07 17:14:48 2014 +0530
# Node ID 5c626bf1e275596b45808c14952bd5aea8aaeb3e
# Parent 2bf727dca27d6f69e96d4412850661cbe036cbef
shortyuv: integrated asm primitives for blockcopy
diff -r 2bf727dca27d -r 5c626bf1e275 source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp Fri Mar 07 15:11:13 2014 +0530
+++ b/source/common/shortyuv.cpp Fri Mar 07 17:14:48 2014 +0530
@@ -129,32 +129,27 @@
void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
{
+ int part = partitionFromSizes(width, height);
int16_t* src = getLumaAddr(partIdx);
int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
uint32_t srcStride = m_width;
uint32_t dstStride = dstPicYuv->m_width;
-#if HIGH_BIT_DEPTH
- primitives.blockcpy_pp(width, height, (pixel*)dst, dstStride, (pixel*)src, srcStride);
-#else
- for (uint32_t y = height; y != 0; y--)
- {
- ::memcpy(dst, src, width * sizeof(int16_t));
- src += srcStride;
- dst += dstStride;
- }
-#endif
+
+ primitives.luma_copy_ss[part](dst, dstStride, src, srcStride);
+
}
void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
{
+ int part = partitionFromSizes(width, height);
int16_t* src = getLumaAddr(partIdx);
pixel* dst = dstPicYuv->getLumaAddr(partIdx);
uint32_t srcStride = m_width;
uint32_t dstStride = dstPicYuv->getStride();
- primitives.blockcpy_ps(width, height, dst, dstStride, src, srcStride);
+ primitives.luma_copy_sp[part](dst, dstStride, src, srcStride);
}
void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
@@ -163,23 +158,15 @@
int16_t* srcV = getCrAddr(partIdx);
int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
+ width = width << m_hChromaShift;
+ height = height << m_vChromaShift;
+ int part = partitionFromSizes(width, height);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->m_cwidth;
-#if HIGH_BIT_DEPTH
- primitives.blockcpy_pp(width, height, (pixel*)dstU, dstStride, (pixel*)srcU, srcStride);
- primitives.blockcpy_pp(width, height, (pixel*)dstV, dstStride, (pixel*)srcV, srcStride);
-#else
- for (uint32_t y = height; y != 0; y--)
- {
- ::memcpy(dstU, srcU, width * sizeof(int16_t));
- ::memcpy(dstV, srcV, width * sizeof(int16_t));
- srcU += srcStride;
- srcV += srcStride;
- dstU += dstStride;
- dstV += dstStride;
- }
-#endif
+
+ primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
}
void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
@@ -188,28 +175,29 @@
int16_t* srcV = getCrAddr(partIdx);
pixel* dstU = dstPicYuv->getCbAddr(partIdx);
pixel* dstV = dstPicYuv->getCrAddr(partIdx);
+ width = width << m_hChromaShift;
+ height = height << m_vChromaShift;
+ int part = partitionFromSizes(width, height);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->getCStride();
- primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, srcStride);
- primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
}
void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId)
{
+ width = width << m_hChromaShift;
+ height = height << m_vChromaShift;
+ int part = partitionFromSizes(width, height);
if (chromaId == 0)
{
int16_t* srcU = getCbAddr(partIdx);
int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->m_cwidth;
- for (uint32_t y = height; y != 0; y--)
- {
- ::memcpy(dstU, srcU, width * sizeof(int16_t));
- srcU += srcStride;
- dstU += dstStride;
- }
+ primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
}
else if (chromaId == 1)
{
@@ -217,12 +205,7 @@
int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->m_cwidth;
- for (uint32_t y = height; y != 0; y--)
- {
- ::memcpy(dstV, srcV, width * sizeof(int16_t));
- srcV += srcStride;
- dstV += dstStride;
- }
+ primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
}
else
{
@@ -232,27 +215,23 @@
int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->m_cwidth;
- for (uint32_t y = height; y != 0; y--)
- {
- ::memcpy(dstU, srcU, width * sizeof(int16_t));
- ::memcpy(dstV, srcV, width * sizeof(int16_t));
- srcU += srcStride;
- srcV += srcStride;
- dstU += dstStride;
- dstV += dstStride;
- }
+ primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
}
}
void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId)
{
+ width = width << m_hChromaShift;
+ height = height << m_vChromaShift;
+ int part = partitionFromSizes(width, height);
if (chromaId == 0)
{
int16_t* srcU = getCbAddr(partIdx);
pixel* dstU = dstPicYuv->getCbAddr(partIdx);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->getCStride();
- primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
}
else if (chromaId == 1)
{
@@ -260,7 +239,7 @@
pixel* dstV = dstPicYuv->getCrAddr(partIdx);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->getCStride();
- primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
}
else
{
@@ -271,7 +250,7 @@
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->getCStride();
- primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, srcStride);
- primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
}
}
More information about the x265-devel
mailing list