[x265] [PATCH 1/5] intrapred: implement IntraAngle mode to all size
Min Chen
chenm003 at 163.com
Thu Jun 20 18:53:11 CEST 2013
>From d546dcea73466e33468489c5754c66667539f981 Mon Sep 17 00:00:00 2001
From: Min Chen <chenm003 at 163.com>
Date: Thu, 20 Jun 2013 16:14:02 +0800
Subject: [PATCH 1/5] intrapred: implement IntraAngle mode to all size
---
source/Lib/TLibEncoder/TEncSearch.cpp | 5 ++-
source/common/IntraPred.cpp | 22 ++++++++++-------
source/common/primitives.h | 2 +-
source/common/vec/intrapred.inc | 39 +++++++++++++++++-------------
source/test/intrapredharness.cpp | 41 +++++++++++++++++++-------------
source/test/intrapredharness.h | 2 +-
6 files changed, 64 insertions(+), 47 deletions(-)
diff --git a/source/Lib/TLibEncoder/TEncSearch.cpp b/source/Lib/TLibEncoder/TEncSearch.cpp
index 7654de7..0e6ddc9 100644
--- a/source/Lib/TLibEncoder/TEncSearch.cpp
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp
@@ -2203,7 +2203,8 @@ Void TEncSearch::estIntraPredQT(TComDataCU* pcCU,
UInt uiStride = pcPredYuv->getStride();
UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
Int numModesForFullRD = g_aucIntraModeNumFast[uiWidthBit];
- x265::pixelcmp sa8d = x265::primitives.sa8d[(int)g_aucConvertToBit[uiWidth]];
+ Int nLog2SizeMinus2 = g_aucConvertToBit[uiWidth];
+ x265::pixelcmp sa8d = x265::primitives.sa8d[nLog2SizeMinus2];
Bool doFastSearch = (numModesForFullRD != numModesAvailable);
if (doFastSearch)
@@ -2253,7 +2254,7 @@ Void TEncSearch::estIntraPredQT(TComDataCU* pcCU,
Pel *pLeft0 = refLeft + uiWidth - 1;
Pel *pLeft1 = refLeftFlt + uiWidth - 1;
- x265::primitives.getIPredAngs4((pixel*)tmp, (pixel*)pAbove0, (pixel*)pLeft0, (pixel*)pAbove1, (pixel*)pLeft1, (uiWidth<16));
+ x265::primitives.getIPredAngs[nLog2SizeMinus2]((pixel*)tmp, (pixel*)pAbove0, (pixel*)pLeft0, (pixel*)pAbove1, (pixel*)pLeft1, (uiWidth<16));
// TODO: We need SATD_x4 here
for (UInt uiMode = 2; uiMode < numModesAvailable; uiMode++)
diff --git a/source/common/IntraPred.cpp b/source/common/IntraPred.cpp
index f76a7f7..54f5c4f 100644
--- a/source/common/IntraPred.cpp
+++ b/source/common/IntraPred.cpp
@@ -258,6 +258,7 @@ unsigned char g_aucIntraFilterType[][35] = {
};
#endif
+template<int size>
void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1, pixel *pLeft1, bool bLuma)
{
int iMode;
@@ -270,9 +271,9 @@ void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1
{
pixel *pLeft = pLeft0;
pixel *pAbove = pAbove0;
- pixel *pDst = pDst0 + (iMode-2) * (4 * 4);
+ pixel *pDst = pDst0 + (iMode-2) * (size * size);
- xPredIntraAngBufRef(8, pDst, 4, 4, iMode, bLuma, pLeft, pAbove);
+ xPredIntraAngBufRef(8, pDst, size, size, iMode, bLuma, pLeft, pAbove);
// Optimize code don't flip buffer
bool modeHor = (iMode < 18);
@@ -280,14 +281,13 @@ void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1
if (modeHor)
{
pixel tmp;
- const int width = 4;
- for (int k = 0; k < width - 1; k++)
+ for (int k = 0; k < size - 1; k++)
{
- for (int l = k + 1; l < width; l++)
+ for (int l = k + 1; l < size; l++)
{
- tmp = pDst[k * width + l];
- pDst[k * width + l] = pDst[l * width + k];
- pDst[l * width + k] = tmp;
+ tmp = pDst[k * size + l];
+ pDst[k * size + l] = pDst[l * size + k];
+ pDst[l * size + k] = tmp;
}
}
}
@@ -304,6 +304,10 @@ void Setup_C_IPredPrimitives(EncoderPrimitives& p)
p.getIPredDC = xPredIntraDC;
p.getIPredPlanar = xPredIntraPlanar;
p.getIPredAng = xPredIntraAngBufRef;
- p.getIPredAngs4 = xPredIntraAngs4;
+ p.getIPredAngs[0] = xPredIntraAngs4<4>;
+ p.getIPredAngs[1] = xPredIntraAngs4<8>;
+ p.getIPredAngs[2] = xPredIntraAngs4<16>;
+ p.getIPredAngs[3] = xPredIntraAngs4<32>;
+ p.getIPredAngs[4] = xPredIntraAngs4<64>;
}
}
diff --git a/source/common/primitives.h b/source/common/primitives.h
index 9b57ca2..8e646a0 100644
--- a/source/common/primitives.h
+++ b/source/common/primitives.h
@@ -236,7 +236,7 @@ struct EncoderPrimitives
getIPredDC_t getIPredDC;
getIPredPlanar_t getIPredPlanar;
getIPredAng_p getIPredAng;
- getIPredAngs_t getIPredAngs4;
+ getIPredAngs_t getIPredAngs[5];
quant deQuant;
dct_t dct[NUM_DCTS];
idct_t idct[NUM_IDCTS];
diff --git a/source/common/vec/intrapred.inc b/source/common/vec/intrapred.inc
index 829444f..cf76ca2 100644
--- a/source/common/vec/intrapred.inc
+++ b/source/common/vec/intrapred.inc
@@ -4665,10 +4665,9 @@ void xPredIntraAngBufRef(int bitDepth, pixel* pDst, int dstStride, int width, in
}
}
-#if HIGH_BIT_DEPTH || (INSTRSET < 4)
-
// TODO: reference code, please optimize it
-void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1, pixel *pLeft1, bool bLuma)
+template<int size>
+void xPredIntraAngs(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1, pixel *pLeft1, bool bLuma)
{
int iMode;
@@ -4680,8 +4679,8 @@ void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1
{
pixel *pLeft = pLeft0;
pixel *pAbove = pAbove0;
- pixel *pDst = pDst0 + (iMode-2) * (4 * 4);
- xPredIntraAngBufRef(8, pDst, 4, 4, iMode, bLuma, pLeft, pAbove);
+ pixel *pDst = pDst0 + (iMode-2) * (size * size);
+ xPredIntraAngBufRef(8, pDst, size, size, iMode, bLuma, pLeft, pAbove);
// Optimize code don't flip buffer
bool modeHor = (iMode < 18);
@@ -4689,22 +4688,19 @@ void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1
if (modeHor)
{
pixel tmp;
- const int width = 4;
- for (int k = 0; k < width - 1; k++)
+ for (int k = 0; k < size - 1; k++)
{
- for (int l = k + 1; l < width; l++)
+ for (int l = k + 1; l < size; l++)
{
- tmp = pDst[k * width + l];
- pDst[k * width + l] = pDst[l * width + k];
- pDst[l * width + k] = tmp;
+ tmp = pDst[k * size + l];
+ pDst[k * size + l] = pDst[l * size + k];
+ pDst[l * size + k] = tmp;
}
}
}
}
}
-#else // HIGH_BIT_DEPTH || (INSTRSET < 4)
-
ALIGN_VAR_32(static const unsigned char, tab_angle_0[][16]) =
{
{ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }, // 0
@@ -5111,8 +5107,6 @@ void xPredIntraAngs4(pixel *pDst0, pixel *pAbove0, pixel *pLeft0, pixel *pAbove1
_mm_store_si128((__m128i*)pDstN[16], T30);
}
-#endif // HIGH_BIT_DEPTH || (INSTRSET < 4)
-
}
#include "utils.h"
@@ -5124,8 +5118,19 @@ void NAME(Setup_Vec_IPredPrimitives)(EncoderPrimitives& p)
p.getIPredDC = predIntraDC;
p.getIPredPlanar = predIntraPlanar;
p.getIPredAng = xPredIntraAngBufRef;
-#if !HIGH_BIT_DEPTH
- p.getIPredAngs4 = xPredIntraAngs4;
+
+#if HIGH_BIT_DEPTH || (INSTRSET < 4)
+ p.getIPredAngs[0] = xPredIntraAngs<4>;
+ p.getIPredAngs[1] = xPredIntraAngs<8>;
+ p.getIPredAngs[2] = xPredIntraAngs<16>;
+ p.getIPredAngs[3] = xPredIntraAngs<32>;
+ p.getIPredAngs[4] = xPredIntraAngs<64>;
+#else
+ p.getIPredAngs[0] = xPredIntraAngs4;
+ p.getIPredAngs[1] = xPredIntraAngs<8>;
+ p.getIPredAngs[2] = xPredIntraAngs<16>;
+ p.getIPredAngs[3] = xPredIntraAngs<32>;
+ p.getIPredAngs[4] = xPredIntraAngs<64>;
#endif
}
diff --git a/source/test/intrapredharness.cpp b/source/test/intrapredharness.cpp
index 42b6dbe..0380cc3 100644
--- a/source/test/intrapredharness.cpp
+++ b/source/test/intrapredharness.cpp
@@ -171,14 +171,18 @@ bool IntraPredHarness::check_getIPredAng_primitive(x265::getIPredAng_p ref, x265
return true;
}
-bool IntraPredHarness::check_getIPredAngs4_primitive(x265::getIPredAngs_t ref, x265::getIPredAngs_t opt)
+bool IntraPredHarness::check_getIPredAngs_primitive(const x265::getIPredAngs_t ref[], const x265::getIPredAngs_t opt[])
{
int j = ADI_BUF_STRIDE;
Bool isLuma;
- for (int width = 4; width <= 4; width <<= 1)
+ for (int size = 2; size <= 5; size++)
{
+ if (opt[size-2] == NULL) continue;
+
+ const int width = (1<<size);
+
for (int i = 0; i <= 100; i++)
{
isLuma = (width <= 16) && (rand()%2);
@@ -196,8 +200,8 @@ bool IntraPredHarness::check_getIPredAngs4_primitive(x265::getIPredAngs_t ref, x
memset(pixel_out_33_C, 0xCD, out_size);
#endif
- ref(pixel_out_33_C, refAbove0, refLeft0, refAbove1, refLeft1, isLuma);
- opt(pixel_out_33_Vec, refAbove0, refLeft0, refAbove1, refLeft1, isLuma);
+ ref[size-2](pixel_out_33_C, refAbove0, refLeft0, refAbove1, refLeft1, isLuma);
+ opt[size-2](pixel_out_33_Vec, refAbove0, refLeft0, refAbove1, refLeft1, isLuma);
for (int p = 2-2; p <= 34-2; p++)
{
for (int k = 0; k < width; k++)
@@ -243,11 +247,11 @@ bool IntraPredHarness::testCorrectness(const EncoderPrimitives& ref, const Encod
return false;
}
}
- if (opt.getIPredAngs4)
+ if (opt.getIPredAngs[0])
{
- if (!check_getIPredAngs4_primitive(ref.getIPredAngs4, opt.getIPredAngs4))
+ if (!check_getIPredAngs_primitive(ref.getIPredAngs, opt.getIPredAngs))
{
- printf("intrapred_angular_4x4_33_modes failed\n");
+ printf("intrapred_angular_33_modes failed\n");
return false;
}
}
@@ -297,18 +301,21 @@ void IntraPredHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderP
}
}
}
- if (opt.getIPredAngs4)
+ for (int size = 2; size <= 6; size++)
{
- for (int ii = 4; ii <= 4; ii <<= 1)
+ if (opt.getIPredAngs[size-2])
{
- width = ii;
- bool bFilter = (width <= 16);
- pixel * refAbove = pixel_buff + srcStride;
- pixel * refLeft = refAbove + 3 * width;
- refLeft[0] = refAbove[0];
- printf("IPred_getIPredAngs4\t\t");
- REPORT_SPEEDUP(opt.getIPredAngs4, ref.getIPredAngs4,
- pixel_out_33_Vec, refAbove, refLeft, refAbove, refLeft, bFilter);
+ for (int ii = 4; ii <= 4; ii <<= 1)
+ {
+ width = ii;
+ bool bFilter = (width <= 16);
+ pixel * refAbove = pixel_buff + srcStride;
+ pixel * refLeft = refAbove + 3 * width;
+ refLeft[0] = refAbove[0];
+ printf("IPred_getIPredAngs%d\t\t", (1<<size));
+ REPORT_SPEEDUP(opt.getIPredAngs[size-2], ref.getIPredAngs[size-2],
+ pixel_out_33_Vec, refAbove, refLeft, refAbove, refLeft, bFilter);
+ }
}
}
}
diff --git a/source/test/intrapredharness.h b/source/test/intrapredharness.h
index 9c144f1..560c11a 100644
--- a/source/test/intrapredharness.h
+++ b/source/test/intrapredharness.h
@@ -46,7 +46,7 @@ protected:
bool check_getIPredDC_primitive(x265::getIPredDC_t ref, x265::getIPredDC_t opt);
bool check_getIPredPlanar_primitive(x265::getIPredPlanar_t ref, x265::getIPredPlanar_t opt);
bool check_getIPredAng_primitive(x265::getIPredAng_p ref, x265::getIPredAng_p opt);
- bool check_getIPredAngs4_primitive(x265::getIPredAngs_t ref, x265::getIPredAngs_t opt);
+ bool check_getIPredAngs_primitive(const x265::getIPredAngs_t ref[], const x265::getIPredAngs_t opt[]);
public:
--
1.7.9.msysgit.0
More information about the x265-devel
mailing list