[x265] [PATCH] asm: removed unused code in pixel_var module
Murugan Vairavel
murugan at multicorewareinc.com
Tue Nov 26 08:49:09 CET 2013
Ignore this patch. Need some modifications in C code.
On Tue, Nov 26, 2013 at 12:45 PM, <murugan at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Murugan Vairavel <murugan at multicorewareinc.com>
> # Date 1385450061 -19800
> # Tue Nov 26 12:44:21 2013 +0530
> # Node ID e866b2f9fcd2d4004e968243f18be1fa2a6c87a9
> # Parent 9e9767a887e3a91c0953b9bfa17c2f34f03ecf11
> asm: removed unused code in pixel_var module
>
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/pixel.cpp
> --- a/source/common/pixel.cpp Mon Nov 25 19:28:33 2013 +0530
> +++ b/source/common/pixel.cpp Tue Nov 26 12:44:21 2013 +0530
> @@ -968,17 +968,8 @@
> p.ssim_4x4x2_core = ssim_4x4x2_core;
> p.ssim_end_4 = ssim_end_4;
>
> - p.var[LUMA_8x4] = pixel_var<8, 4>;
> p.var[LUMA_8x8] = pixel_var<8, 8>;
> - p.var[LUMA_8x16] = pixel_var<8, 16>;
> - p.var[LUMA_8x32] = pixel_var<8, 32>;
> - p.var[LUMA_16x4] = pixel_var<16, 4>;
> - p.var[LUMA_16x8] = pixel_var<16, 8>;
> - p.var[LUMA_16x12] = pixel_var<16, 12>;
> p.var[LUMA_16x16] = pixel_var<16, 16>;
> - p.var[LUMA_16x32] = pixel_var<16, 32>;
> - p.var[LUMA_16x64] = pixel_var<16, 64>;
> -
> p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
> }
> }
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/primitives.h
> --- a/source/common/primitives.h Mon Nov 25 19:28:33 2013 +0530
> +++ b/source/common/primitives.h Tue Nov 26 12:44:21 2013 +0530
> @@ -268,7 +268,7 @@
> calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];
> transpose_t transpose[NUM_SQUARE_BLOCKS];
>
> - var_t var[NUM_LUMA_PARTITIONS];
> + var_t var[NUM_SQUARE_BLOCKS];
> ssim_4x4x2_core_t ssim_4x4x2_core;
> ssim_end4_t ssim_end_4;
>
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Mon Nov 25 19:28:33 2013
> +0530
> +++ b/source/common/x86/asm-primitives.cpp Tue Nov 26 12:44:21 2013
> +0530
> @@ -416,16 +416,8 @@
> p.var[LUMA_ ## W ## x ## H] = x265_pixel_var_ ## W ## x ## H ## cpu;
>
> #define LUMA_VAR(cpu) \
> - SETUP_PIXEL_VAR_DEF(8, 4, cpu); \
> SETUP_PIXEL_VAR_DEF(8, 8, cpu); \
> - SETUP_PIXEL_VAR_DEF(8, 16, cpu); \
> - SETUP_PIXEL_VAR_DEF(8, 32, cpu); \
> - SETUP_PIXEL_VAR_DEF(16, 4, cpu); \
> - SETUP_PIXEL_VAR_DEF(16, 8, cpu); \
> - SETUP_PIXEL_VAR_DEF(16, 12, cpu); \
> - SETUP_PIXEL_VAR_DEF(16, 16, cpu); \
> - SETUP_PIXEL_VAR_DEF(16, 32, cpu); \
> - SETUP_PIXEL_VAR_DEF(16, 64, cpu);
> + SETUP_PIXEL_VAR_DEF(16, 16, cpu);
>
> namespace x265 {
> // private x265 namespace
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/pixel-a.asm
> --- a/source/common/x86/pixel-a.asm Mon Nov 25 19:28:33 2013 +0530
> +++ b/source/common/x86/pixel-a.asm Tue Nov 26 12:44:21 2013 +0530
> @@ -1254,12 +1254,6 @@
> VAR_2ROW 8*SIZEOF_PIXEL, 16
> VAR_END 16, 16
>
> -cglobal pixel_var_8x16, 2,3
> - FIX_STRIDES r1
> - VAR_START 0
> - VAR_2ROW r1, 8
> - VAR_END 8, 16
> -
> cglobal pixel_var_8x8, 2,3
> FIX_STRIDES r1
> VAR_START 0
> @@ -1301,18 +1295,6 @@
>
> %if HIGH_BIT_DEPTH == 0
> %macro VAR 0
> -cglobal pixel_var_8x4, 2,3,8
> - VAR_START 1
> - lea r2, [r1 * 3]
> - movh m0, [r0]
> - movh m3, [r0 + r1]
> - movhps m0, [r0 + r1 * 2]
> - movhps m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - VAR_END 8, 4
> -
> cglobal pixel_var_8x8, 2,3,8
> VAR_START 1
> lea r2, [r1 * 3]
> @@ -1331,142 +1313,6 @@
> VAR_CORE
> VAR_END 8, 8
>
> -
> -cglobal pixel_var_8x16, 2,4,8
> - VAR_START 1
> - lea r2, [r1 * 3]
> - movh m0, [r0]
> - movh m3, [r0 + r1]
> - movhps m0, [r0 + r1 * 2]
> - movhps m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - movh m0, [r0]
> - movh m3, [r0 + r1]
> - movhps m0, [r0 + r1 * 2]
> - movhps m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - movh m0, [r0]
> - movh m3, [r0 + r1]
> - movhps m0, [r0 + r1 * 2]
> - movhps m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - movh m0, [r0]
> - movh m3, [r0 + r1]
> - movhps m0, [r0 + r1 * 2]
> - movhps m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - VAR_END 8, 16
> -
> -cglobal pixel_var_8x32, 2,4,8
> - VAR_START 1
> - mov r2d, 2
> - lea r3, [r1 * 3]
> -.loop:
> - movh m0, [r0]
> - movh m3, [r0 + r1]
> - movhps m0, [r0 + r1 * 2]
> - movhps m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - movh m0, [r0]
> - movh m3, [r0 + r1]
> - movhps m0, [r0 + r1 * 2]
> - movhps m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - movh m0, [r0]
> - movh m3, [r0 + r1]
> - movhps m0, [r0 + r1 * 2]
> - movhps m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - movh m0, [r0]
> - movh m3, [r0 + r1]
> - movhps m0, [r0 + r1 * 2]
> - movhps m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - dec r2d
> - jnz .loop
> - VAR_END 8, 32
> -
> -cglobal pixel_var_16x4, 2,3,8
> - VAR_START 1
> - lea r2, [r1 * 3]
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - VAR_END 16, 4
> -
> -cglobal pixel_var_16x8, 2,3,8
> - VAR_START 1
> - lea r2, [r1 * 3]
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - VAR_END 16, 8
> -
> -cglobal pixel_var_16x12, 2,3,8
> - VAR_START 1
> - lea r2, [r1 * 3]
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r2]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - VAR_END 16, 12
> -
> cglobal pixel_var_16x16, 2,3,8
> VAR_START 1
> lea r2, [r1 * 3]
> @@ -1506,96 +1352,6 @@
> DEINTB 1, 0, 4, 3, 7
> VAR_CORE
> VAR_END 16, 16
> -
> -cglobal pixel_var_16x32, 2,4,8
> - VAR_START 1
> - mov r2d, 2
> - lea r3, [r1 * 3]
> -.loop:
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - dec r2d
> - jg .loop
> - VAR_END 16, 32
> -
> -cglobal pixel_var_16x64, 2,4,8
> - VAR_START 1
> - mov r2d, 4
> - lea r3, [r1 * 3]
> -.loop:
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - mova m0, [r0]
> - mova m3, [r0 + r1]
> - DEINTB 1, 0, 4, 3, 7
> - VAR_CORE
> - mova m0, [r0 + 2 * r1]
> - mova m3, [r0 + r3]
> - DEINTB 1, 0, 4, 3, 7
> - lea r0, [r0 + r1 * 4]
> - VAR_CORE
> - dec r2d
> - jg .loop
> - VAR_END 16, 64
> %endmacro ; VAR
>
> INIT_XMM sse2
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/pixel.h
> --- a/source/common/x86/pixel.h Mon Nov 25 19:28:33 2013 +0530
> +++ b/source/common/x86/pixel.h Tue Nov 26 12:44:21 2013 +0530
> @@ -351,16 +351,8 @@
> uint64_t x265_pixel_var_ ## W ## x ## H ## cpu(pixel *pix, intptr_t
> pixstride);
>
> #define LUMA_PIXELVAR_DEF(cpu) \
> - SETUP_LUMA_PIXELVAR_FUNC(8, 4, cpu); \
> SETUP_LUMA_PIXELVAR_FUNC(8, 8, cpu); \
> - SETUP_LUMA_PIXELVAR_FUNC(8, 16, cpu); \
> - SETUP_LUMA_PIXELVAR_FUNC(8, 32, cpu); \
> - SETUP_LUMA_PIXELVAR_FUNC(16, 4, cpu); \
> - SETUP_LUMA_PIXELVAR_FUNC(16, 8, cpu); \
> - SETUP_LUMA_PIXELVAR_FUNC(16, 12, cpu); \
> - SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu); \
> - SETUP_LUMA_PIXELVAR_FUNC(16, 32, cpu); \
> - SETUP_LUMA_PIXELVAR_FUNC(16, 48, cpu);
> + SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu);
>
> LUMA_PIXELVAR_DEF(_sse2);
>
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp Mon Nov 25 19:28:33 2013 +0530
> +++ b/source/test/pixelharness.cpp Tue Nov 26 12:44:21 2013 +0530
> @@ -777,15 +777,6 @@
> }
> }
>
> - if (opt.var[part])
> - {
> - if (!check_pixel_var(ref.var[part], opt.var[part]))
> - {
> - printf("var[%s]: failed!\n", lumaPartStr[part]);
> - return false;
> - }
> - }
> -
> for(int i = 0; i < X265_CSP_COUNT; i++)
> {
> if (opt.chroma[i].copy_pp[part])
> @@ -905,6 +896,15 @@
> return false;
> }
> }
> +
> + if (opt.var[i])
> + {
> + if (!check_pixel_var(ref.var[i], opt.var[i]))
> + {
> + printf("var[%dx%d] failed\n", 4 << i, 4 << i);
> + return false;
> + }
> + }
> }
>
> if (opt.cvt32to16_shr)
> @@ -1080,12 +1080,6 @@
> REPORT_SPEEDUP(opt.luma_add_ps[part], ref.luma_add_ps[part],
> pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);
> }
>
> - if (opt.var[part])
> - {
> - HEADER("var[%s]", lumaPartStr[part]);
> - REPORT_SPEEDUP(opt.var[part], ref.var[part], pbuf1, STRIDE);
> - }
> -
> for (int i = 0; i < X265_CSP_COUNT; i++)
> {
> if (opt.chroma[i].copy_pp[part])
> @@ -1179,6 +1173,12 @@
> HEADER("transpose[%dx%d]", 4 << i, 4 << i);
> REPORT_SPEEDUP(opt.transpose[i], ref.transpose[i], pbuf1,
> pbuf2, STRIDE);
> }
> +
> + if (opt.var[i])
> + {
> + HEADER("var[%dx%d]", 4 << i, 4 << i);
> + REPORT_SPEEDUP(opt.var[i], ref.var[i], pbuf1, STRIDE);
> + }
> }
>
> if (opt.cvt32to16_shr)
>
--
With Regards,
Murugan. V
+919659287478
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131126/08fb0384/attachment-0001.html>
More information about the x265-devel
mailing list