[x265] [PATCH 113 of 307] x86: Aligned routine encoder integration for addavg primitive

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:31:51 CEST 2018


# HG changeset patch
# User Jayashri Murugan
# Date 1507106012 -19800
#      Wed Oct 04 14:03:32 2017 +0530
# Node ID ddc227597df3335e30cec9a50489f3fd87391274
# Parent  762682acf5c25bdecbfec2d0f4f32da7dea3a9e2
x86: Aligned routine encoder integration for addavg primitive

diff -r 762682acf5c2 -r ddc227597df3 source/common/yuv.cpp
--- a/source/common/yuv.cpp	Wed Sep 27 17:08:32 2017 +0530
+++ b/source/common/yuv.cpp	Wed Oct 04 14:03:32 2017 +0530
@@ -38,8 +38,9 @@
     m_buf[2] = NULL;
 }
 
-bool Yuv::create(uint32_t size, int csp)
+bool Yuv::create(uint32_t size, int csp, const int cpuid)
 {
+    m_cpuid = cpuid;
     m_csp = csp;
     m_hChromaShift = CHROMA_H_SHIFT(csp);
     m_vChromaShift = CHROMA_V_SHIFT(csp);
@@ -192,7 +193,10 @@
         const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
         const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
         pixel* dstY = getLumaAddr(absPartIdx);
-        primitives.pu[part].addAvg(srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
+        if ((srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0) && (m_size % 64 == 0) && (m_cpuid & X265_CPU_AVX512))
+            primitives.pu[part].addAvg_aligned(srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
+        else
+            primitives.pu[part].addAvg(srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
     }
     if (bChroma)
     {
@@ -202,8 +206,16 @@
         const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
         pixel* dstU = getCbAddr(absPartIdx);
         pixel* dstV = getCrAddr(absPartIdx);
-        primitives.chroma[m_csp].pu[part].addAvg(srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
-        primitives.chroma[m_csp].pu[part].addAvg(srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+        if ((srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0) && (m_cpuid & X265_CPU_AVX512))
+        {
+            primitives.chroma[m_csp].pu[part].addAvg_aligned(srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+            primitives.chroma[m_csp].pu[part].addAvg_aligned(srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+        }
+        else
+        {
+            primitives.chroma[m_csp].pu[part].addAvg(srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+            primitives.chroma[m_csp].pu[part].addAvg(srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+        }
     }
 }
 
diff -r 762682acf5c2 -r ddc227597df3 source/common/yuv.h
--- a/source/common/yuv.h	Wed Sep 27 17:08:32 2017 +0530
+++ b/source/common/yuv.h	Wed Oct 04 14:03:32 2017 +0530
@@ -38,7 +38,7 @@
 class Yuv
 {
 public:
-
+    int      m_cpuid;
     pixel*   m_buf[3];
 
     uint32_t m_size;
@@ -52,7 +52,7 @@
 
     Yuv();
 
-    bool   create(uint32_t size, int csp);
+    bool   create(uint32_t size, int csp, const int cpuid);
     void   destroy();
 
     // Copy YUV buffer to picture buffer
diff -r 762682acf5c2 -r ddc227597df3 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Wed Sep 27 17:08:32 2017 +0530
+++ b/source/encoder/analysis.cpp	Wed Oct 04 14:03:32 2017 +0530
@@ -101,14 +101,14 @@
     {
         ModeDepth &md = m_modeDepth[depth];
         ok &= md.cuMemPool.create(depth, csp, MAX_PRED_TYPES, *m_param);
-        ok &= md.fencYuv.create(cuSize, csp);
+        ok &= md.fencYuv.create(cuSize, csp, m_param->cupid);
         if (ok)
         {
             for (int j = 0; j < MAX_PRED_TYPES; j++)
             {
                 md.pred[j].cu.initialize(md.cuMemPool, depth, *m_param, j);
-                ok &= md.pred[j].predYuv.create(cuSize, csp);
-                ok &= md.pred[j].reconYuv.create(cuSize, csp);
+                ok &= md.pred[j].predYuv.create(cuSize, csp, m_param->cpuid);
+                ok &= md.pred[j].reconYuv.create(cuSize, csp, m_param->cpuid);
                 md.pred[j].fencYuv = &md.fencYuv;
             }
         }
diff -r 762682acf5c2 -r ddc227597df3 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Wed Sep 27 17:08:32 2017 +0530
+++ b/source/encoder/motion.cpp	Wed Oct 04 14:03:32 2017 +0530
@@ -115,7 +115,7 @@
 
 void MotionEstimate::init(int csp)
 {
-    fencPUYuv.create(FENC_STRIDE, csp);
+    fencPUYuv.create(FENC_STRIDE, csp, fencPUYuv.m_cpuid);
 }
 
 void MotionEstimate::initScales(void)
diff -r 762682acf5c2 -r ddc227597df3 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Wed Sep 27 17:08:32 2017 +0530
+++ b/source/encoder/search.cpp	Wed Oct 04 14:03:32 2017 +0530
@@ -120,7 +120,7 @@
             CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL + sizeC * 2);
             m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[0] + sizeL;
             m_rqt[i].coeffRQT[2] = m_rqt[i].coeffRQT[0] + sizeL + sizeC;
-            ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize, param.internalCsp);
+            ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize, param.internalCsp, param.cpuid);
             ok &= m_rqt[i].resiQtYuv.create(param.maxCUSize, param.internalCsp);
         }
     }
@@ -130,7 +130,7 @@
         {
             CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL);
             m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[2] = NULL;
-            ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize, param.internalCsp);
+            ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize, param.internalCsp, param.cpuid);
             ok &= m_rqt[i].resiQtYuv.create(param.maxCUSize, param.internalCsp);
         }
     }
@@ -140,9 +140,9 @@
     {
         int cuSize = param.maxCUSize >> i;
         ok &= m_rqt[i].tmpResiYuv.create(cuSize, param.internalCsp);
-        ok &= m_rqt[i].tmpPredYuv.create(cuSize, param.internalCsp);
-        ok &= m_rqt[i].bidirPredYuv[0].create(cuSize, param.internalCsp);
-        ok &= m_rqt[i].bidirPredYuv[1].create(cuSize, param.internalCsp);
+        ok &= m_rqt[i].tmpPredYuv.create(cuSize, param.internalCsp, param.cpuid);
+        ok &= m_rqt[i].bidirPredYuv[0].create(cuSize, param.internalCsp, param.cpuid);
+        ok &= m_rqt[i].bidirPredYuv[1].create(cuSize, param.internalCsp, param.cpuid);
     }
 
     if (param.internalCsp != X265_CSP_I400)


More information about the x265-devel mailing list