[x265] [PATCH] primitive: rename functions and add testharness code
rajesh at multicorewareinc.com
rajesh at multicorewareinc.com
Tue Feb 3 06:23:03 CET 2015
# HG changeset patch
# User Rajesh Paulraj
# Date 1422940886 -19800
# Tue Feb 03 10:51:26 2015 +0530
# Node ID c27446474b2d4fc14d51e4bb2f002f0e39eb4c92
# Parent f189b9328d93694e182356b18275d537c987b8bd
primitive: rename functions and add testharness code
diff -r f189b9328d93 -r c27446474b2d source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp Mon Feb 02 17:08:08 2015 -0600
+++ b/source/common/ipfilter.cpp Tue Feb 03 10:51:26 2015 +0530
@@ -34,8 +34,27 @@
#endif
namespace {
+template<int dstStride, int width, int height>
+void pixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst)
+{
+ int shift = IF_INTERNAL_PREC - X265_DEPTH;
+ int row, col;
+
+ for (row = 0; row < height; row++)
+ {
+ for (col = 0; col < width; col++)
+ {
+ int16_t val = src[col] << shift;
+ dst[col] = val - (int16_t)IF_INTERNAL_OFFS;
+ }
+
+ src += srcStride;
+ dst += dstStride;
+ }
+}
+
template<int dstStride>
-void filterConvertPelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height)
+void filterPixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height)
{
int shift = IF_INTERNAL_PREC - X265_DEPTH;
int row, col;
@@ -378,7 +397,8 @@
p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>; \
p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
- p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].chroma_p2s = pixelToShort_c<MAX_CU_SIZE / 2, W, H>;
#define CHROMA_422(W, H) \
p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
@@ -386,7 +406,8 @@
p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>; \
p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
- p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].chroma_p2s = pixelToShort_c<MAX_CU_SIZE / 2, W, H>;
#define CHROMA_444(W, H) \
p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
@@ -394,7 +415,8 @@
p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>; \
p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
- p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>;
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_ ## W ## x ## H].chroma_p2s = pixelToShort_c<MAX_CU_SIZE, W, H>;
#define LUMA(W, H) \
p.pu[LUMA_ ## W ## x ## H].luma_hpp = interp_horiz_pp_c<8, W, H>; \
@@ -403,7 +425,8 @@
p.pu[LUMA_ ## W ## x ## H].luma_vps = interp_vert_ps_c<8, W, H>; \
p.pu[LUMA_ ## W ## x ## H].luma_vsp = interp_vert_sp_c<8, W, H>; \
p.pu[LUMA_ ## W ## x ## H].luma_vss = interp_vert_ss_c<8, W, H>; \
- p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_c<8, W, H>;
+ p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_c<8, W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].filter_p2s = pixelToShort_c<MAX_CU_SIZE, W, H>
void setupFilterPrimitives_c(EncoderPrimitives& p)
{
@@ -507,11 +530,11 @@
CHROMA_444(48, 64);
CHROMA_444(64, 16);
CHROMA_444(16, 64);
- p.luma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
+ p.luma_p2s = filterPixelToShort_c<MAX_CU_SIZE>;
- p.chroma[X265_CSP_I444].p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
- p.chroma[X265_CSP_I420].p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
- p.chroma[X265_CSP_I422].p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
+ p.chroma[X265_CSP_I444].p2s = filterPixelToShort_c<MAX_CU_SIZE>;
+ p.chroma[X265_CSP_I420].p2s = filterPixelToShort_c<MAX_CU_SIZE / 2>;
+ p.chroma[X265_CSP_I422].p2s = filterPixelToShort_c<MAX_CU_SIZE / 2>;
p.extendRowBorder = extendCURowColBorder;
}
diff -r f189b9328d93 -r c27446474b2d source/common/primitives.cpp
--- a/source/common/primitives.cpp Mon Feb 02 17:08:08 2015 -0600
+++ b/source/common/primitives.cpp Tue Feb 03 10:51:26 2015 +0530
@@ -98,6 +98,7 @@
p.chroma[X265_CSP_I444].pu[i].copy_pp = p.pu[i].copy_pp;
p.chroma[X265_CSP_I444].pu[i].addAvg = p.pu[i].addAvg;
p.chroma[X265_CSP_I444].pu[i].satd = p.pu[i].satd;
+ p.chroma[X265_CSP_I444].pu[i].chroma_p2s = p.pu[i].filter_p2s;
}
for (int i = 0; i < NUM_CU_SIZES; i++)
@@ -168,6 +169,36 @@
p.chroma[X265_CSP_I422].cu[BLOCK_422_2x4].sa8d = NULL;
p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].sa8d = p.pu[LUMA_4x8].satd;
+ /* Chroma PU can often use filter_p2s primitives */
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_4x4].chroma_p2s = p.pu[LUMA_4x4].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_8x8].chroma_p2s = p.pu[LUMA_8x8].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_16x16].chroma_p2s = p.pu[LUMA_16x16].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_32x32].chroma_p2s = p.pu[LUMA_32x32].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_64x64].chroma_p2s = p.pu[LUMA_64x64].filter_p2s;
+
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_8x4].chroma_p2s = p.pu[LUMA_8x4].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_4x8].chroma_p2s = p.pu[LUMA_4x8].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_16x8].chroma_p2s = p.pu[LUMA_16x8].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_8x16].chroma_p2s = p.pu[LUMA_8x16].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_32x16].chroma_p2s = p.pu[LUMA_32x16].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_16x32].chroma_p2s = p.pu[LUMA_16x32].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_64x32].chroma_p2s = p.pu[LUMA_64x32].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_32x64].chroma_p2s = p.pu[LUMA_32x64].filter_p2s;
+
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_16x12].chroma_p2s = p.pu[LUMA_16x12].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_12x16].chroma_p2s = p.pu[LUMA_12x16].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_16x4].chroma_p2s = p.pu[LUMA_16x4].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_4x16].chroma_p2s = p.pu[LUMA_4x16].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_32x24].chroma_p2s = p.pu[LUMA_32x24].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_24x32].chroma_p2s = p.pu[LUMA_24x32].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_32x8].chroma_p2s = p.pu[LUMA_32x8].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_8x32].chroma_p2s = p.pu[LUMA_8x32].filter_p2s;
+
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_64x48].chroma_p2s = p.pu[LUMA_64x48].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_48x64].chroma_p2s = p.pu[LUMA_48x64].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_64x16].chroma_p2s = p.pu[LUMA_64x16].filter_p2s;
+ p.chroma[X265_CSP_I444].pu[CHROMA_444_16x64].chroma_p2s = p.pu[LUMA_16x64].filter_p2s;
+
/* alias CU copy_pp from square PU copy_pp */
for (int i = 0; i < NUM_CU_SIZES; i++)
{
diff -r f189b9328d93 -r c27446474b2d source/common/primitives.h
--- a/source/common/primitives.h Mon Feb 02 17:08:08 2015 -0600
+++ b/source/common/primitives.h Tue Feb 03 10:51:26 2015 +0530
@@ -110,6 +110,21 @@
BLOCK_422_32x64
};
+enum ChromaPU444
+{
+ // Square (the first 5 PUs match the CU sizes)
+ CHROMA_444_4x4, CHROMA_444_8x8, CHROMA_444_16x16, CHROMA_444_32x32, CHROMA_444_64x64,
+ // Rectangular
+ CHROMA_444_8x4, CHROMA_444_4x8,
+ CHROMA_444_16x8, CHROMA_444_8x16,
+ CHROMA_444_32x16, CHROMA_444_16x32,
+ CHROMA_444_64x32, CHROMA_444_32x64,
+ // Asymmetrical (0.75, 0.25)
+ CHROMA_444_16x12, CHROMA_444_12x16, CHROMA_444_16x4, CHROMA_444_4x16,
+ CHROMA_444_32x24, CHROMA_444_24x32, CHROMA_444_32x8, CHROMA_444_8x32,
+ CHROMA_444_64x48, CHROMA_444_48x64, CHROMA_444_64x16, CHROMA_444_16x64,
+};
+
typedef int (*pixelcmp_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned
typedef int (*pixelcmp_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);
typedef int (*pixel_ssd_s_t)(const int16_t* fenc, intptr_t fencstride);
@@ -155,7 +170,8 @@
typedef void (*filter_sp_t) (const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
typedef void (*filter_ss_t) (const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
typedef void (*filter_hv_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
-typedef void (*filter_p2s_t)(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
+typedef void (*filter_p2s_wxh_t)(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
+typedef void (*filter_p2s_t)(const pixel* src, intptr_t srcStride, int16_t* dst);
typedef void (*copy_pp_t)(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride); // dst is aligned
typedef void (*copy_sp_t)(pixel* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
@@ -207,6 +223,7 @@
addAvg_t addAvg; // bidir motion compensation, uses 16bit values
copy_pp_t copy_pp;
+ filter_p2s_t filter_p2s;
}
pu[NUM_PU_SIZES];
@@ -286,7 +303,7 @@
weightp_sp_t weight_sp;
weightp_pp_t weight_pp;
- filter_p2s_t luma_p2s;
+ filter_p2s_wxh_t luma_p2s;
/* There is one set of chroma primitives per color space. An encoder will
* have just a single color space and thus it will only ever use one entry
@@ -311,6 +328,8 @@
filter_hps_t filter_hps;
addAvg_t addAvg;
copy_pp_t copy_pp;
+ filter_p2s_t chroma_p2s;
+
}
pu[NUM_PU_SIZES];
@@ -329,7 +348,7 @@
}
cu[NUM_CU_SIZES];
- filter_p2s_t p2s; // takes width/height as arguments
+ filter_p2s_wxh_t p2s; // takes width/height as arguments
}
chroma[X265_CSP_COUNT];
};
diff -r f189b9328d93 -r c27446474b2d source/test/ipfilterharness.cpp
--- a/source/test/ipfilterharness.cpp Mon Feb 02 17:08:08 2015 -0600
+++ b/source/test/ipfilterharness.cpp Tue Feb 03 10:51:26 2015 +0530
@@ -61,7 +61,7 @@
}
}
-bool IPFilterHarness::check_IPFilter_primitive(filter_p2s_t ref, filter_p2s_t opt, int isChroma, int csp)
+bool IPFilterHarness::check_IPFilter_primitive(filter_p2s_wxh_t ref, filter_p2s_wxh_t opt, int isChroma, int csp)
{
intptr_t rand_srcStride;
int min_size = isChroma ? 2 : 4;
@@ -512,6 +512,46 @@
return true;
}
+bool IPFilterHarness::check_IPFilterLumaP2S_primitive(filter_p2s_t ref, filter_p2s_t opt)
+{
+ for (int i = 0; i < ITERS; i++)
+ {
+ intptr_t rand_srcStride = rand() % 100;
+ int index = i % TEST_CASES;
+
+ ref(pixel_test_buff[index] + i, rand_srcStride, IPF_C_output_s);
+
+ checked(opt, pixel_test_buff[index] + i, rand_srcStride, IPF_vec_output_s);
+
+ if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE * sizeof(pixel)))
+ return false;
+
+ reportfail();
+ }
+
+ return true;
+}
+
+bool IPFilterHarness::check_IPFilterChromaP2S_primitive(filter_p2s_t ref, filter_p2s_t opt)
+{
+ for (int i = 0; i < ITERS; i++)
+ {
+ intptr_t rand_srcStride = rand() % 100;
+ int index = i % TEST_CASES;
+
+ ref(pixel_test_buff[index] + i, rand_srcStride, IPF_C_output_s);
+
+ checked(opt, pixel_test_buff[index] + i, rand_srcStride, IPF_vec_output_s);
+
+ if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE * sizeof(pixel)))
+ return false;
+
+ reportfail();
+ }
+
+ return true;
+}
+
bool IPFilterHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
{
if (opt.luma_p2s)
@@ -582,6 +622,14 @@
return false;
}
}
+ if (opt.pu[value].filter_p2s)
+ {
+ if (!check_IPFilterLumaP2S_primitive(ref.pu[value].filter_p2s, opt.pu[value].filter_p2s))
+ {
+ printf("filter_p2s[%s]", lumaPartStr[value]);
+ return false;
+ }
+ }
}
for (int csp = X265_CSP_I420; csp < X265_CSP_COUNT; csp++)
@@ -644,6 +692,14 @@
return false;
}
}
+ if (opt.chroma[csp].pu[value].chroma_p2s)
+ {
+ if (!check_IPFilterChromaP2S_primitive(ref.chroma[csp].pu[value].chroma_p2s, opt.chroma[csp].pu[value].chroma_p2s))
+ {
+ printf("chroma_p2s[%s]", chromaPartStr[csp][value]);
+ return false;
+ }
+ }
}
}
@@ -720,6 +776,13 @@
REPORT_SPEEDUP(opt.pu[value].luma_hvpp, ref.pu[value].luma_hvpp,
pixel_buff + 3 * srcStride, srcStride, IPF_vec_output_p, srcStride, 1, 3);
}
+
+ if (opt.pu[value].filter_p2s)
+ {
+ printf("filter_p2s [%s]\t", lumaPartStr[value]);
+ REPORT_SPEEDUP(opt.pu[value].filter_p2s, ref.pu[value].filter_p2s,
+ pixel_buff, srcStride, IPF_vec_output_s);
+ }
}
for (int csp = X265_CSP_I420; csp < X265_CSP_COUNT; csp++)
@@ -773,6 +836,14 @@
short_buff + maxVerticalfilterHalfDistance * srcStride, srcStride,
IPF_vec_output_s, dstStride, 1);
}
+
+ if (opt.chroma[csp].pu[value].chroma_p2s)
+ {
+ printf("chroma_p2s[%s]\t", chromaPartStr[csp][value]);
+ REPORT_SPEEDUP(opt.chroma[csp].pu[value].chroma_p2s, ref.chroma[csp].pu[value].chroma_p2s,
+ pixel_buff, srcStride,
+ IPF_vec_output_s);
+ }
}
}
}
diff -r f189b9328d93 -r c27446474b2d source/test/ipfilterharness.h
--- a/source/test/ipfilterharness.h Mon Feb 02 17:08:08 2015 -0600
+++ b/source/test/ipfilterharness.h Tue Feb 03 10:51:26 2015 +0530
@@ -50,7 +50,7 @@
pixel pixel_test_buff[TEST_CASES][TEST_BUF_SIZE];
int16_t short_test_buff[TEST_CASES][TEST_BUF_SIZE];
- bool check_IPFilter_primitive(filter_p2s_t ref, filter_p2s_t opt, int isChroma, int csp);
+ bool check_IPFilter_primitive(filter_p2s_wxh_t ref, filter_p2s_wxh_t opt, int isChroma, int csp);
bool check_IPFilterChroma_primitive(filter_pp_t ref, filter_pp_t opt);
bool check_IPFilterChroma_ps_primitive(filter_ps_t ref, filter_ps_t opt);
bool check_IPFilterChroma_hps_primitive(filter_hps_t ref, filter_hps_t opt);
@@ -62,6 +62,8 @@
bool check_IPFilterLuma_sp_primitive(filter_sp_t ref, filter_sp_t opt);
bool check_IPFilterLuma_ss_primitive(filter_ss_t ref, filter_ss_t opt);
bool check_IPFilterLumaHV_primitive(filter_hv_pp_t ref, filter_hv_pp_t opt);
+ bool check_IPFilterLumaP2S_primitive(filter_p2s_t ref, filter_p2s_t opt);
+ bool check_IPFilterChromaP2S_primitive(filter_p2s_t ref, filter_p2s_t opt);
public:
More information about the x265-devel
mailing list