[x265] [PATCH] shortyuv: integrated asm primitives for blockcopy
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Mon Mar 10 12:29:34 CET 2014
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1394450962 -19800
# Mon Mar 10 16:59:22 2014 +0530
# Node ID 4c8c208d66f2a159336fd9aa4522cb4aafe013d7
# Parent 50d7910ddd61632deeed969cf8e474561b175622
shortyuv: integrated asm primitives for blockcopy
diff -r 50d7910ddd61 -r 4c8c208d66f2 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Sun Mar 09 22:10:39 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Mar 10 16:59:22 2014 +0530
@@ -1110,8 +1110,7 @@
m_qtTempShortYuv[qtlayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << trSizeLog2, 1 << trSizeLog2);
if (!bLumaOnly && !bSkipChroma)
{
- uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 - 1);
- m_qtTempShortYuv[qtlayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2);
+ m_qtTempShortYuv[qtlayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << trSizeLog2, bChromaSame);
}
}
else
@@ -1166,8 +1165,7 @@
if (!bLumaOnly && !bSkipChroma)
{
- uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 - 1);
- m_qtTempShortYuv[qtlayer].copyPartToPartChroma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2);
+ m_qtTempShortYuv[qtlayer].copyPartToPartChroma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeLog2, bChromaSame);
}
}
@@ -1280,8 +1278,8 @@
}
//===== copy reconstruction =====
- uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 - 1);
- m_qtTempShortYuv[qtlayer].copyPartToPartChroma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2, stateU0V1Both2);
+ uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
+ m_qtTempShortYuv[qtlayer].copyPartToPartYuvChroma(&m_qtTempTransformSkipYuv, absPartIdx, lumaSize, stateU0V1Both2);
}
}
@@ -1538,8 +1536,7 @@
::memcpy(coeffDstV, coeffSrcV, sizeof(TCoeff) * numCoeffC);
//===== copy reconstruction =====
- uint32_t trSizeCLog2 = (bChromaSame || (chFmt == CHROMA_444)) ? trSizeLog2 : trSizeLog2 - 1;
- m_qtTempShortYuv[qtlayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2);
+ m_qtTempShortYuv[qtlayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << trSizeLog2, bChromaSame);
}
else
{
@@ -4155,6 +4152,7 @@
const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
bool bCodeChroma = true;
+ bool bChromaSame = false;
uint32_t trModeC = trMode;
if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
{
@@ -4162,6 +4160,7 @@
trModeC--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
bCodeChroma = ((absPartIdx % qpdiv) == 0);
+ bChromaSame = true;
}
if (bSpatial)
@@ -4172,7 +4171,7 @@
if (bCodeChroma)
{
- m_qtTempShortYuv[qtlayer].copyPartToPartChroma(resiYuv, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2);
+ m_qtTempShortYuv[qtlayer].copyPartToPartChroma(resiYuv, absPartIdx, 1 << trSizeLog2, bChromaSame);
}
}
else
diff -r 50d7910ddd61 -r 4c8c208d66f2 source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp Sun Mar 09 22:10:39 2014 -0500
+++ b/source/common/shortyuv.cpp Mon Mar 10 16:59:22 2014 +0530
@@ -115,50 +115,34 @@
primitives.pixeladd_ss(cpartSize, cpartSize, getCrAddr(trUnitIdx, cpartSize), m_cwidth, srcV0, srcV1, srcYuv0->m_cwidth, srcYuv1->m_cwidth);
}
-void ShortYuv::copyPartToPartYuv(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
-{
- copyPartToPartLuma(dstPicYuv, partIdx, width, height);
- copyPartToPartChroma(dstPicYuv, partIdx, width >> m_hChromaShift, height >> m_vChromaShift);
-}
-
-void ShortYuv::copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
-{
- copyPartToPartLuma(dstPicYuv, partIdx, width, height);
- copyPartToPartChroma(dstPicYuv, partIdx, width >> m_hChromaShift, height >> m_vChromaShift);
-}
-
void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
{
+ int part = partitionFromSizes(width, height);
int16_t* src = getLumaAddr(partIdx);
int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
uint32_t srcStride = m_width;
uint32_t dstStride = dstPicYuv->m_width;
-#if HIGH_BIT_DEPTH
- primitives.blockcpy_pp(width, height, (pixel*)dst, dstStride, (pixel*)src, srcStride);
-#else
- for (uint32_t y = height; y != 0; y--)
- {
- ::memcpy(dst, src, width * sizeof(int16_t));
- src += srcStride;
- dst += dstStride;
- }
-#endif
+
+ primitives.luma_copy_ss[part](dst, dstStride, src, srcStride);
+
}
void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
{
+ int part = partitionFromSizes(width, height);
int16_t* src = getLumaAddr(partIdx);
pixel* dst = dstPicYuv->getLumaAddr(partIdx);
uint32_t srcStride = m_width;
uint32_t dstStride = dstPicYuv->getStride();
- primitives.blockcpy_ps(width, height, dst, dstStride, src, srcStride);
+ primitives.luma_copy_sp[part](dst, dstStride, src, srcStride);
}
-void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
+void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame)
{
+ int part = partitionFromSizes(lumaSize, lumaSize);
int16_t* srcU = getCbAddr(partIdx);
int16_t* srcV = getCrAddr(partIdx);
int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
@@ -166,24 +150,22 @@
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->m_cwidth;
-#if HIGH_BIT_DEPTH
- primitives.blockcpy_pp(width, height, (pixel*)dstU, dstStride, (pixel*)srcU, srcStride);
- primitives.blockcpy_pp(width, height, (pixel*)dstV, dstStride, (pixel*)srcV, srcStride);
-#else
- for (uint32_t y = height; y != 0; y--)
+
+ if (bChromaSame)
{
- ::memcpy(dstU, srcU, width * sizeof(int16_t));
- ::memcpy(dstV, srcV, width * sizeof(int16_t));
- srcU += srcStride;
- srcV += srcStride;
- dstU += dstStride;
- dstV += dstStride;
+ primitives.luma_copy_ss[part](dstU, dstStride, srcU, srcStride);
+ primitives.luma_copy_ss[part](dstV, dstStride, srcV, srcStride);
}
-#endif
+ else
+ {
+ primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
+ }
}
-void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
+void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame)
{
+ int part = partitionFromSizes(lumaSize, lumaSize);
int16_t* srcU = getCbAddr(partIdx);
int16_t* srcV = getCrAddr(partIdx);
pixel* dstU = dstPicYuv->getCbAddr(partIdx);
@@ -192,24 +174,28 @@
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->getCStride();
- primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, srcStride);
- primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, srcStride);
+ if (bChromaSame)
+ {
+ primitives.luma_copy_sp[part](dstU, dstStride, srcU, srcStride);
+ primitives.luma_copy_sp[part](dstV, dstStride, srcV, srcStride);
+ }
+ else
+ {
+ primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
+ }
}
-void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId)
+void ShortYuv::copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId)
{
+ int part = partitionFromSizes(lumaSize, lumaSize);
if (chromaId == 0)
{
int16_t* srcU = getCbAddr(partIdx);
int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->m_cwidth;
- for (uint32_t y = height; y != 0; y--)
- {
- ::memcpy(dstU, srcU, width * sizeof(int16_t));
- srcU += srcStride;
- dstU += dstStride;
- }
+ primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
}
else if (chromaId == 1)
{
@@ -217,12 +203,7 @@
int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->m_cwidth;
- for (uint32_t y = height; y != 0; y--)
- {
- ::memcpy(dstV, srcV, width * sizeof(int16_t));
- srcV += srcStride;
- dstV += dstStride;
- }
+ primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
}
else
{
@@ -232,27 +213,21 @@
int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->m_cwidth;
- for (uint32_t y = height; y != 0; y--)
- {
- ::memcpy(dstU, srcU, width * sizeof(int16_t));
- ::memcpy(dstV, srcV, width * sizeof(int16_t));
- srcU += srcStride;
- srcV += srcStride;
- dstU += dstStride;
- dstV += dstStride;
- }
+ primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
}
}
-void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId)
+void ShortYuv::copyPartToPartYuvChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId)
{
+ int part = partitionFromSizes(lumaSize, lumaSize);
if (chromaId == 0)
{
int16_t* srcU = getCbAddr(partIdx);
pixel* dstU = dstPicYuv->getCbAddr(partIdx);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->getCStride();
- primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
}
else if (chromaId == 1)
{
@@ -260,7 +235,7 @@
pixel* dstV = dstPicYuv->getCrAddr(partIdx);
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->getCStride();
- primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
}
else
{
@@ -271,7 +246,7 @@
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->getCStride();
- primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, srcStride);
- primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
}
}
diff -r 50d7910ddd61 -r 4c8c208d66f2 source/common/shortyuv.h
--- a/source/common/shortyuv.h Sun Mar 09 22:10:39 2014 -0500
+++ b/source/common/shortyuv.h Mon Mar 10 16:59:22 2014 +0530
@@ -97,15 +97,13 @@
void subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t trUnitIdx, uint32_t partSize);
void addClip(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t trUnitIdx, uint32_t partSize);
- void copyPartToPartYuv(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
- void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
- void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId);
+ void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame);
+ void copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId);
- void copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
- void copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
- void copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId);
+ void copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame);
+ void copyPartToPartYuvChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId);
};
}
More information about the x265-devel
mailing list