[x265] [PATCH 113 of 307] x86: Aligned routine encoder integration for addavg primitive
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:31:51 CEST 2018
# HG changeset patch
# User Jayashri Murugan
# Date 1507106012 -19800
# Wed Oct 04 14:03:32 2017 +0530
# Node ID ddc227597df3335e30cec9a50489f3fd87391274
# Parent 762682acf5c25bdecbfec2d0f4f32da7dea3a9e2
x86: Aligned routine encoder integration for addavg primitive
diff -r 762682acf5c2 -r ddc227597df3 source/common/yuv.cpp
--- a/source/common/yuv.cpp Wed Sep 27 17:08:32 2017 +0530
+++ b/source/common/yuv.cpp Wed Oct 04 14:03:32 2017 +0530
@@ -38,8 +38,9 @@
m_buf[2] = NULL;
}
-bool Yuv::create(uint32_t size, int csp)
+bool Yuv::create(uint32_t size, int csp, const int cpuid)
{
+ m_cpuid = cpuid;
m_csp = csp;
m_hChromaShift = CHROMA_H_SHIFT(csp);
m_vChromaShift = CHROMA_V_SHIFT(csp);
@@ -192,7 +193,10 @@
const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
pixel* dstY = getLumaAddr(absPartIdx);
- primitives.pu[part].addAvg(srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
+ if ((srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0) && (m_size % 64 == 0) && (m_cpuid & X265_CPU_AVX512))
+ primitives.pu[part].addAvg_aligned(srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
+ else
+ primitives.pu[part].addAvg(srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
}
if (bChroma)
{
@@ -202,8 +206,16 @@
const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
pixel* dstU = getCbAddr(absPartIdx);
pixel* dstV = getCrAddr(absPartIdx);
- primitives.chroma[m_csp].pu[part].addAvg(srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
- primitives.chroma[m_csp].pu[part].addAvg(srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+ if ((srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0) && (m_cpuid & X265_CPU_AVX512))
+ {
+ primitives.chroma[m_csp].pu[part].addAvg_aligned(srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+ primitives.chroma[m_csp].pu[part].addAvg_aligned(srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+ }
+ else
+ {
+ primitives.chroma[m_csp].pu[part].addAvg(srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+ primitives.chroma[m_csp].pu[part].addAvg(srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+ }
}
}
diff -r 762682acf5c2 -r ddc227597df3 source/common/yuv.h
--- a/source/common/yuv.h Wed Sep 27 17:08:32 2017 +0530
+++ b/source/common/yuv.h Wed Oct 04 14:03:32 2017 +0530
@@ -38,7 +38,7 @@
class Yuv
{
public:
-
+ int m_cpuid;
pixel* m_buf[3];
uint32_t m_size;
@@ -52,7 +52,7 @@
Yuv();
- bool create(uint32_t size, int csp);
+ bool create(uint32_t size, int csp, const int cpuid);
void destroy();
// Copy YUV buffer to picture buffer
diff -r 762682acf5c2 -r ddc227597df3 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Wed Sep 27 17:08:32 2017 +0530
+++ b/source/encoder/analysis.cpp Wed Oct 04 14:03:32 2017 +0530
@@ -101,14 +101,14 @@
{
ModeDepth &md = m_modeDepth[depth];
ok &= md.cuMemPool.create(depth, csp, MAX_PRED_TYPES, *m_param);
- ok &= md.fencYuv.create(cuSize, csp);
+ ok &= md.fencYuv.create(cuSize, csp, m_param->cupid);
if (ok)
{
for (int j = 0; j < MAX_PRED_TYPES; j++)
{
md.pred[j].cu.initialize(md.cuMemPool, depth, *m_param, j);
- ok &= md.pred[j].predYuv.create(cuSize, csp);
- ok &= md.pred[j].reconYuv.create(cuSize, csp);
+ ok &= md.pred[j].predYuv.create(cuSize, csp, m_param->cpuid);
+ ok &= md.pred[j].reconYuv.create(cuSize, csp, m_param->cpuid);
md.pred[j].fencYuv = &md.fencYuv;
}
}
diff -r 762682acf5c2 -r ddc227597df3 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Wed Sep 27 17:08:32 2017 +0530
+++ b/source/encoder/motion.cpp Wed Oct 04 14:03:32 2017 +0530
@@ -115,7 +115,7 @@
void MotionEstimate::init(int csp)
{
- fencPUYuv.create(FENC_STRIDE, csp);
+ fencPUYuv.create(FENC_STRIDE, csp, fencPUYuv.m_cpuid);
}
void MotionEstimate::initScales(void)
diff -r 762682acf5c2 -r ddc227597df3 source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed Sep 27 17:08:32 2017 +0530
+++ b/source/encoder/search.cpp Wed Oct 04 14:03:32 2017 +0530
@@ -120,7 +120,7 @@
CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL + sizeC * 2);
m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[0] + sizeL;
m_rqt[i].coeffRQT[2] = m_rqt[i].coeffRQT[0] + sizeL + sizeC;
- ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize, param.internalCsp);
+ ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize, param.internalCsp, param.cpuid);
ok &= m_rqt[i].resiQtYuv.create(param.maxCUSize, param.internalCsp);
}
}
@@ -130,7 +130,7 @@
{
CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL);
m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[2] = NULL;
- ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize, param.internalCsp);
+ ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize, param.internalCsp, param.cpuid);
ok &= m_rqt[i].resiQtYuv.create(param.maxCUSize, param.internalCsp);
}
}
@@ -140,9 +140,9 @@
{
int cuSize = param.maxCUSize >> i;
ok &= m_rqt[i].tmpResiYuv.create(cuSize, param.internalCsp);
- ok &= m_rqt[i].tmpPredYuv.create(cuSize, param.internalCsp);
- ok &= m_rqt[i].bidirPredYuv[0].create(cuSize, param.internalCsp);
- ok &= m_rqt[i].bidirPredYuv[1].create(cuSize, param.internalCsp);
+ ok &= m_rqt[i].tmpPredYuv.create(cuSize, param.internalCsp, param.cpuid);
+ ok &= m_rqt[i].bidirPredYuv[0].create(cuSize, param.internalCsp, param.cpuid);
+ ok &= m_rqt[i].bidirPredYuv[1].create(cuSize, param.internalCsp, param.cpuid);
}
if (param.internalCsp != X265_CSP_I400)
More information about the x265-devel
mailing list