[x265] [PATCH] shortyuv: integrated asm primitives for blockcopy

murugan at multicorewareinc.com murugan at multicorewareinc.com
Fri Mar 7 12:45:19 CET 2014


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1394192688 -19800
#      Fri Mar 07 17:14:48 2014 +0530
# Node ID 5c626bf1e275596b45808c14952bd5aea8aaeb3e
# Parent  2bf727dca27d6f69e96d4412850661cbe036cbef
shortyuv: integrated asm primitives for blockcopy

diff -r 2bf727dca27d -r 5c626bf1e275 source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp	Fri Mar 07 15:11:13 2014 +0530
+++ b/source/common/shortyuv.cpp	Fri Mar 07 17:14:48 2014 +0530
@@ -129,32 +129,27 @@
 
 void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
 {
+    int part = partitionFromSizes(width, height);
     int16_t* src = getLumaAddr(partIdx);
     int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
 
     uint32_t srcStride = m_width;
     uint32_t dstStride = dstPicYuv->m_width;
-#if HIGH_BIT_DEPTH
-    primitives.blockcpy_pp(width, height, (pixel*)dst, dstStride, (pixel*)src, srcStride);
-#else
-    for (uint32_t y = height; y != 0; y--)
-    {
-        ::memcpy(dst, src, width * sizeof(int16_t));
-        src += srcStride;
-        dst += dstStride;
-    }
-#endif
+
+    primitives.luma_copy_ss[part](dst, dstStride, src, srcStride);
+
 }
 
 void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
 {
+    int part = partitionFromSizes(width, height);
     int16_t* src = getLumaAddr(partIdx);
     pixel* dst = dstPicYuv->getLumaAddr(partIdx);
 
     uint32_t srcStride = m_width;
     uint32_t dstStride = dstPicYuv->getStride();
 
-    primitives.blockcpy_ps(width, height, dst, dstStride, src, srcStride);
+    primitives.luma_copy_sp[part](dst, dstStride, src, srcStride);
 }
 
 void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
@@ -163,23 +158,15 @@
     int16_t* srcV = getCrAddr(partIdx);
     int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
     int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
+    width  = width  << m_hChromaShift;
+    height = height << m_vChromaShift;
+    int part = partitionFromSizes(width, height);
 
     uint32_t srcStride = m_cwidth;
     uint32_t dstStride = dstPicYuv->m_cwidth;
-#if HIGH_BIT_DEPTH
-    primitives.blockcpy_pp(width, height, (pixel*)dstU, dstStride, (pixel*)srcU, srcStride);
-    primitives.blockcpy_pp(width, height, (pixel*)dstV, dstStride, (pixel*)srcV, srcStride);
-#else
-    for (uint32_t y = height; y != 0; y--)
-    {
-        ::memcpy(dstU, srcU, width * sizeof(int16_t));
-        ::memcpy(dstV, srcV, width * sizeof(int16_t));
-        srcU += srcStride;
-        srcV += srcStride;
-        dstU += dstStride;
-        dstV += dstStride;
-    }
-#endif
+
+    primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
+    primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
 }
 
 void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
@@ -188,28 +175,29 @@
     int16_t* srcV = getCrAddr(partIdx);
     pixel* dstU = dstPicYuv->getCbAddr(partIdx);
     pixel* dstV = dstPicYuv->getCrAddr(partIdx);
+    width  = width  << m_hChromaShift;
+    height = height << m_vChromaShift;
+    int part = partitionFromSizes(width, height);
 
     uint32_t srcStride = m_cwidth;
     uint32_t dstStride = dstPicYuv->getCStride();
 
-    primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, srcStride);
-    primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, srcStride);
+    primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
+    primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
 }
 
 void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId)
 {
+    width  = width  << m_hChromaShift;
+    height = height << m_vChromaShift;
+    int part = partitionFromSizes(width, height);
     if (chromaId == 0)
     {
         int16_t* srcU = getCbAddr(partIdx);
         int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
         uint32_t srcStride = m_cwidth;
         uint32_t dstStride = dstPicYuv->m_cwidth;
-        for (uint32_t y = height; y != 0; y--)
-        {
-            ::memcpy(dstU, srcU, width * sizeof(int16_t));
-            srcU += srcStride;
-            dstU += dstStride;
-        }
+        primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
     }
     else if (chromaId == 1)
     {
@@ -217,12 +205,7 @@
         int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
         uint32_t srcStride = m_cwidth;
         uint32_t dstStride = dstPicYuv->m_cwidth;
-        for (uint32_t y = height; y != 0; y--)
-        {
-            ::memcpy(dstV, srcV, width * sizeof(int16_t));
-            srcV += srcStride;
-            dstV += dstStride;
-        }
+        primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
     }
     else
     {
@@ -232,27 +215,23 @@
         int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
         uint32_t srcStride = m_cwidth;
         uint32_t dstStride = dstPicYuv->m_cwidth;
-        for (uint32_t y = height; y != 0; y--)
-        {
-            ::memcpy(dstU, srcU, width * sizeof(int16_t));
-            ::memcpy(dstV, srcV, width * sizeof(int16_t));
-            srcU += srcStride;
-            srcV += srcStride;
-            dstU += dstStride;
-            dstV += dstStride;
-        }
+        primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
+        primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
     }
 }
 
 void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId)
 {
+    width  = width  << m_hChromaShift;
+    height = height << m_vChromaShift;
+    int part = partitionFromSizes(width, height);
     if (chromaId == 0)
     {
         int16_t* srcU = getCbAddr(partIdx);
         pixel* dstU = dstPicYuv->getCbAddr(partIdx);
         uint32_t srcStride = m_cwidth;
         uint32_t dstStride = dstPicYuv->getCStride();
-        primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, srcStride);
+        primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
     }
     else if (chromaId == 1)
     {
@@ -260,7 +239,7 @@
         pixel* dstV = dstPicYuv->getCrAddr(partIdx);
         uint32_t srcStride = m_cwidth;
         uint32_t dstStride = dstPicYuv->getCStride();
-        primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, srcStride);
+        primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
     }
     else
     {
@@ -271,7 +250,7 @@
 
         uint32_t srcStride = m_cwidth;
         uint32_t dstStride = dstPicYuv->getCStride();
-        primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, srcStride);
-        primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, srcStride);
+        primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
+        primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
     }
 }


More information about the x265-devel mailing list