[x265] [PATCH 1/5] intrapred: implement IntraAngle mode to all size

Min Chen chenm003 at 163.com
Thu Jun 20 18:53:11 CEST 2013


>From d546dcea73466e33468489c5754c66667539f981 Mon Sep 17 00:00:00 2001
From: Min Chen <chenm003 at 163.com>
Date: Thu, 20 Jun 2013 16:14:02 +0800
Subject: [PATCH 1/5] intrapred: implement IntraAngle mode to all size

---
 source/Lib/TLibEncoder/TEncSearch.cpp |    5 ++-
 source/common/IntraPred.cpp           |   22 ++++++++++-------
 source/common/primitives.h            |    2 +-
 source/common/vec/intrapred.inc       |   39 +++++++++++++++++-------------
 source/test/intrapredharness.cpp      |   41 +++++++++++++++++++-------------
 source/test/intrapredharness.h        |    2 +-
 6 files changed, 64 insertions(+), 47 deletions(-)

diff --git a/source/Lib/TLibEncoder/TEncSearch.cpp b/source/Lib/TLibEncoder/TEncSearch.cpp
index 7654de7..0e6ddc9 100644
--- a/source/Lib/TLibEncoder/TEncSearch.cpp
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp
@@ -2203,7 +2203,8 @@ Void TEncSearch::estIntraPredQT(TComDataCU* pcCU,
         UInt uiStride      = pcPredYuv->getStride();
         UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
         Int numModesForFullRD = g_aucIntraModeNumFast[uiWidthBit];
-        x265::pixelcmp sa8d = x265::primitives.sa8d[(int)g_aucConvertToBit[uiWidth]];
+        Int nLog2SizeMinus2 = g_aucConvertToBit[uiWidth];
+        x265::pixelcmp sa8d = x265::primitives.sa8d[nLog2SizeMinus2];
 
         Bool doFastSearch = (numModesForFullRD != numModesAvailable);
         if (doFastSearch)
@@ -2253,7 +2254,7 @@ Void TEncSearch::estIntraPredQT(TComDataCU* pcCU,
                 Pel *pLeft0  = refLeft     + uiWidth - 1;
                 Pel *pLeft1  = refLeftFlt  + uiWidth - 1;
 
-                x265::primitives.getIPredAngs4((pixel*)tmp, (pixel*)pAbove0, (pixel*)pLeft0, (pixel*)pAbove1, (pixel*)pLeft1, (uiWidth<16));
+                x265::primitives.getIPredAngs[nLog2SizeMinus2]((pixel*)tmp, (pixel*)pAbove0, (pixel*)pLeft0, (pixel*)pAbove1, (pixel*)pLeft1, (uiWidth<16));
 
                 // TODO: We need SATD_x4 here
                 for (UInt uiMode = 2; uiMode < numModesAvailable; uiMode++)
diff --git a/source/common/IntraPred.cpp b/source/common/IntraPred.cpp
index f76a7f7..54f5c4f 100644
--- a/source/common/IntraPred.cpp
+++ b/source/common/IntraPred.cpp
@@ -258,6 +258,7 @@ unsigned char g_aucIntraFilterType[][35] = {
 };
 #endif
 
+template<int size>
 void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1, pixel *pLeft1, bool bLuma)
 {
     int iMode;
@@ -270,9 +271,9 @@ void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1
     {
         pixel *pLeft = pLeft0;
         pixel *pAbove = pAbove0;
-        pixel *pDst = pDst0 + (iMode-2) * (4 * 4);
+        pixel *pDst = pDst0 + (iMode-2) * (size * size);
 
-        xPredIntraAngBufRef(8, pDst, 4, 4, iMode, bLuma, pLeft, pAbove);
+        xPredIntraAngBufRef(8, pDst, size, size, iMode, bLuma, pLeft, pAbove);
 
         // Optimize code don't flip buffer
         bool modeHor = (iMode < 18);
@@ -280,14 +281,13 @@ void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1
         if (modeHor)
         {
             pixel  tmp;
-            const int width = 4;
-            for (int k = 0; k < width - 1; k++)
+            for (int k = 0; k < size - 1; k++)
             {
-                for (int l = k + 1; l < width; l++)
+                for (int l = k + 1; l < size; l++)
                 {
-                    tmp                 = pDst[k * width + l];
-                    pDst[k * width + l] = pDst[l * width + k];
-                    pDst[l * width + k] = tmp;
+                    tmp                = pDst[k * size + l];
+                    pDst[k * size + l] = pDst[l * size + k];
+                    pDst[l * size + k] = tmp;
                 }
             }
         }
@@ -304,6 +304,10 @@ void Setup_C_IPredPrimitives(EncoderPrimitives& p)
     p.getIPredDC = xPredIntraDC;
     p.getIPredPlanar = xPredIntraPlanar;
     p.getIPredAng = xPredIntraAngBufRef;
-    p.getIPredAngs4 = xPredIntraAngs4;
+    p.getIPredAngs[0] = xPredIntraAngs4<4>;
+    p.getIPredAngs[1] = xPredIntraAngs4<8>;
+    p.getIPredAngs[2] = xPredIntraAngs4<16>;
+    p.getIPredAngs[3] = xPredIntraAngs4<32>;
+    p.getIPredAngs[4] = xPredIntraAngs4<64>;
 }
 }
diff --git a/source/common/primitives.h b/source/common/primitives.h
index 9b57ca2..8e646a0 100644
--- a/source/common/primitives.h
+++ b/source/common/primitives.h
@@ -236,7 +236,7 @@ struct EncoderPrimitives
     getIPredDC_t getIPredDC;
     getIPredPlanar_t getIPredPlanar;
     getIPredAng_p getIPredAng;
-    getIPredAngs_t getIPredAngs4;
+    getIPredAngs_t getIPredAngs[5];
     quant deQuant;
     dct_t dct[NUM_DCTS];
     idct_t idct[NUM_IDCTS];
diff --git a/source/common/vec/intrapred.inc b/source/common/vec/intrapred.inc
index 829444f..cf76ca2 100644
--- a/source/common/vec/intrapred.inc
+++ b/source/common/vec/intrapred.inc
@@ -4665,10 +4665,9 @@ void xPredIntraAngBufRef(int bitDepth, pixel* pDst, int dstStride, int width, in
     }
 }
 
-#if HIGH_BIT_DEPTH || (INSTRSET < 4)
-
 // TODO: reference code, please optimize it
-void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1, pixel *pLeft1, bool bLuma)
+template<int size>
+void xPredIntraAngs(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1, pixel *pLeft1, bool bLuma)
 {
     int iMode;
 
@@ -4680,8 +4679,8 @@ void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1
     {
         pixel *pLeft = pLeft0;
         pixel *pAbove = pAbove0;
-        pixel *pDst = pDst0 + (iMode-2) * (4 * 4);
-        xPredIntraAngBufRef(8, pDst, 4, 4, iMode, bLuma, pLeft, pAbove);
+        pixel *pDst = pDst0 + (iMode-2) * (size * size);
+        xPredIntraAngBufRef(8, pDst, size, size, iMode, bLuma, pLeft, pAbove);
 
         // Optimize code don't flip buffer
         bool modeHor = (iMode < 18);
@@ -4689,22 +4688,19 @@ void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1
         if (modeHor)
         {
             pixel  tmp;
-            const int width = 4;
-            for (int k = 0; k < width - 1; k++)
+            for (int k = 0; k < size - 1; k++)
             {
-                for (int l = k + 1; l < width; l++)
+                for (int l = k + 1; l < size; l++)
                 {
-                    tmp                 = pDst[k * width + l];
-                    pDst[k * width + l] = pDst[l * width + k];
-                    pDst[l * width + k] = tmp;
+                    tmp                = pDst[k * size + l];
+                    pDst[k * size + l] = pDst[l * size + k];
+                    pDst[l * size + k] = tmp;
                 }
             }
         }
     }
 }
 
-#else // HIGH_BIT_DEPTH || (INSTRSET < 4)
-
 ALIGN_VAR_32(static const unsigned char, tab_angle_0[][16]) =
 {
     { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 },         //  0
@@ -5111,8 +5107,6 @@ void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1
     _mm_store_si128((__m128i*)pDstN[16], T30);
 }
 
-#endif // HIGH_BIT_DEPTH || (INSTRSET < 4)
-
 }
 
 #include "utils.h"
@@ -5124,8 +5118,19 @@ void NAME(Setup_Vec_IPredPrimitives)(EncoderPrimitives& p)
     p.getIPredDC = predIntraDC;
     p.getIPredPlanar = predIntraPlanar;
     p.getIPredAng = xPredIntraAngBufRef;
-#if !HIGH_BIT_DEPTH
-    p.getIPredAngs4 = xPredIntraAngs4;
+
+#if HIGH_BIT_DEPTH || (INSTRSET < 4)
+    p.getIPredAngs[0] = xPredIntraAngs<4>;
+    p.getIPredAngs[1] = xPredIntraAngs<8>;
+    p.getIPredAngs[2] = xPredIntraAngs<16>;
+    p.getIPredAngs[3] = xPredIntraAngs<32>;
+    p.getIPredAngs[4] = xPredIntraAngs<64>;
+#else
+    p.getIPredAngs[0] = xPredIntraAngs4;
+    p.getIPredAngs[1] = xPredIntraAngs<8>;
+    p.getIPredAngs[2] = xPredIntraAngs<16>;
+    p.getIPredAngs[3] = xPredIntraAngs<32>;
+    p.getIPredAngs[4] = xPredIntraAngs<64>;
 #endif
 }
 
diff --git a/source/test/intrapredharness.cpp b/source/test/intrapredharness.cpp
index 42b6dbe..0380cc3 100644
--- a/source/test/intrapredharness.cpp
+++ b/source/test/intrapredharness.cpp
@@ -171,14 +171,18 @@ bool IntraPredHarness::check_getIPredAng_primitive(x265::getIPredAng_p ref, x265
     return true;
 }
 
-bool IntraPredHarness::check_getIPredAngs4_primitive(x265::getIPredAngs_t ref, x265::getIPredAngs_t opt)
+bool IntraPredHarness::check_getIPredAngs_primitive(const x265::getIPredAngs_t ref[], const x265::getIPredAngs_t opt[])
 {
     int j = ADI_BUF_STRIDE;
 
     Bool isLuma;
 
-    for (int width = 4; width <= 4; width <<= 1)
+    for (int size = 2; size <= 5; size++)
     {
+        if (opt[size-2] == NULL) continue;
+
+        const int width = (1<<size);
+
         for (int i = 0; i <= 100; i++)
         {
             isLuma = (width <= 16) && (rand()%2);
@@ -196,8 +200,8 @@ bool IntraPredHarness::check_getIPredAngs4_primitive(x265::getIPredAngs_t ref, x
             memset(pixel_out_33_C, 0xCD, out_size);
 #endif
 
-            ref(pixel_out_33_C,   refAbove0, refLeft0, refAbove1, refLeft1, isLuma);
-            opt(pixel_out_33_Vec, refAbove0, refLeft0, refAbove1, refLeft1, isLuma);
+            ref[size-2](pixel_out_33_C,   refAbove0, refLeft0, refAbove1, refLeft1, isLuma);
+            opt[size-2](pixel_out_33_Vec, refAbove0, refLeft0, refAbove1, refLeft1, isLuma);
             for (int p = 2-2; p <= 34-2; p++)
             {
                 for (int k = 0; k < width; k++)
@@ -243,11 +247,11 @@ bool IntraPredHarness::testCorrectness(const EncoderPrimitives& ref, const Encod
             return false;
         }
     }
-    if (opt.getIPredAngs4)
+    if (opt.getIPredAngs[0])
     {
-        if (!check_getIPredAngs4_primitive(ref.getIPredAngs4, opt.getIPredAngs4))
+        if (!check_getIPredAngs_primitive(ref.getIPredAngs, opt.getIPredAngs))
         {
-            printf("intrapred_angular_4x4_33_modes failed\n");
+            printf("intrapred_angular_33_modes failed\n");
             return false;
         }
     }
@@ -297,18 +301,21 @@ void IntraPredHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderP
             }
         }
     }
-    if (opt.getIPredAngs4)
+    for (int size = 2; size <= 6; size++)
     {
-        for (int ii = 4; ii <= 4; ii <<= 1)
+        if (opt.getIPredAngs[size-2])
         {
-            width = ii;
-            bool bFilter  = (width <= 16);
-            pixel * refAbove = pixel_buff + srcStride;
-            pixel * refLeft = refAbove + 3 * width;
-            refLeft[0] = refAbove[0];
-            printf("IPred_getIPredAngs4\t\t");
-            REPORT_SPEEDUP(opt.getIPredAngs4, ref.getIPredAngs4,
-                           pixel_out_33_Vec, refAbove, refLeft, refAbove, refLeft, bFilter);
+            for (int ii = 4; ii <= 4; ii <<= 1)
+            {
+                width = ii;
+                bool bFilter  = (width <= 16);
+                pixel * refAbove = pixel_buff + srcStride;
+                pixel * refLeft = refAbove + 3 * width;
+                refLeft[0] = refAbove[0];
+                printf("IPred_getIPredAngs%d\t\t", (1<<size));
+                REPORT_SPEEDUP(opt.getIPredAngs[size-2], ref.getIPredAngs[size-2],
+                               pixel_out_33_Vec, refAbove, refLeft, refAbove, refLeft, bFilter);
+            }
         }
     }
 }
diff --git a/source/test/intrapredharness.h b/source/test/intrapredharness.h
index 9c144f1..560c11a 100644
--- a/source/test/intrapredharness.h
+++ b/source/test/intrapredharness.h
@@ -46,7 +46,7 @@ protected:
     bool check_getIPredDC_primitive(x265::getIPredDC_t ref, x265::getIPredDC_t opt);
     bool check_getIPredPlanar_primitive(x265::getIPredPlanar_t ref, x265::getIPredPlanar_t opt);
     bool check_getIPredAng_primitive(x265::getIPredAng_p ref, x265::getIPredAng_p opt);
-    bool check_getIPredAngs4_primitive(x265::getIPredAngs_t ref, x265::getIPredAngs_t opt);
+    bool check_getIPredAngs_primitive(const x265::getIPredAngs_t ref[], const x265::getIPredAngs_t opt[]);
 
 public:
 
-- 
1.7.9.msysgit.0




More information about the x265-devel mailing list