[x265] [PATCH 2 of 2] Enable 10 bpp asm filter functions
Steve Borho
steve at borho.org
Tue Feb 25 21:49:23 CET 2014
On Tue, Feb 25, 2014 at 5:39 AM, <nabajit at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Nabajit Deka
> # Date 1393328202 -19800
> # Tue Feb 25 17:06:42 2014 +0530
> # Node ID 41a3689f2a07fa86568e07aab75dd31dd59da4a8
> # Parent c9236d867a07b18d0e28bd39528a02bf03cf4eda
> Enable 10 bpp asm filter functions
>
> diff -r c9236d867a07 -r 41a3689f2a07 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Tue Feb 25 17:04:43 2014 +0530
> +++ b/source/common/x86/asm-primitives.cpp Tue Feb 25 17:06:42 2014 +0530
> @@ -387,12 +387,18 @@
> SETUP_CHROMA_SS_FUNC_DEF_444(32, 8, cpu); \
> SETUP_CHROMA_SS_FUNC_DEF_444(8, 32, cpu);
>
> +#ifdef HIGH_BIT_DEPTH //temporary, for test purpose only
this commit caused a drastic slowdown for 8bpp builds and it took me a
while to figure out why. This needs to be #if HIGH_BIT_DEPTH. 8bpp
builds have -DHIGH_BIT_DEPTH=0 so the macro is defined
Also the comment is wrong at this point, so I've updated it
> +#define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
> + p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
> + p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu;
> +#else
> #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
> p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
> p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
> p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
> p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
> p.luma_copy_ps[LUMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x ## H ## cpu;
> +#endif
>
> #define SETUP_LUMA_SUB_FUNC_DEF(W, H, cpu) \
> p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
> @@ -835,6 +841,7 @@
> {
> LUMA_ADDAVG(_sse4);
> CHROMA_ADDAVG(_sse4);
> + LUMA_FILTERS(_sse4);
>
> p.dct[DCT_8x8] = x265_dct8_sse4;
> p.quant = x265_quant_sse4;
> diff -r c9236d867a07 -r 41a3689f2a07 source/test/ipfilterharness.cpp
> --- a/source/test/ipfilterharness.cpp Tue Feb 25 17:04:43 2014 +0530
> +++ b/source/test/ipfilterharness.cpp Tue Feb 25 17:06:42 2014 +0530
> @@ -342,12 +342,12 @@
> rand_srcStride = rand() % 100; // Randomly generated srcStride
> rand_dstStride = rand() % 100 + 64; // Randomly generated dstStride
>
> - opt(pixel_test_buff[index] + 3 * rand_srcStride,
> + opt(pixel_test_buff[index] + 3 * rand_srcStride + 6,
> rand_srcStride,
> IPF_vec_output_p,
> rand_dstStride,
> coeffIdx);
> - ref(pixel_test_buff[index] + 3 * rand_srcStride,
> + ref(pixel_test_buff[index] + 3 * rand_srcStride + 6,
> rand_srcStride,
> IPF_C_output_p,
> rand_dstStride,
> @@ -408,13 +408,13 @@
> rand_srcStride = rand() % 100; // Randomly generated srcStride
> rand_dstStride = rand() % 100 + 64; // Randomly generated dstStride
>
> - ref(pixel_test_buff[index] + 3 * rand_srcStride,
> + ref(pixel_test_buff[index] + 3 * rand_srcStride + 6,
> rand_srcStride,
> IPF_C_output_s,
> rand_dstStride,
> coeffIdx,
> isRowExt);
> - opt(pixel_test_buff[index] + 3 * rand_srcStride,
> + opt(pixel_test_buff[index] + 3 * rand_srcStride + 6,
> rand_srcStride,
> IPF_vec_output_s,
> rand_dstStride,
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list