[x265] [PATCH] asm: removed unused code in pixel_var module
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Tue Nov 26 08:15:42 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1385450061 -19800
# Tue Nov 26 12:44:21 2013 +0530
# Node ID e866b2f9fcd2d4004e968243f18be1fa2a6c87a9
# Parent 9e9767a887e3a91c0953b9bfa17c2f34f03ecf11
asm: removed unused code in pixel_var module
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/pixel.cpp
--- a/source/common/pixel.cpp Mon Nov 25 19:28:33 2013 +0530
+++ b/source/common/pixel.cpp Tue Nov 26 12:44:21 2013 +0530
@@ -968,17 +968,8 @@
p.ssim_4x4x2_core = ssim_4x4x2_core;
p.ssim_end_4 = ssim_end_4;
- p.var[LUMA_8x4] = pixel_var<8, 4>;
p.var[LUMA_8x8] = pixel_var<8, 8>;
- p.var[LUMA_8x16] = pixel_var<8, 16>;
- p.var[LUMA_8x32] = pixel_var<8, 32>;
- p.var[LUMA_16x4] = pixel_var<16, 4>;
- p.var[LUMA_16x8] = pixel_var<16, 8>;
- p.var[LUMA_16x12] = pixel_var<16, 12>;
p.var[LUMA_16x16] = pixel_var<16, 16>;
- p.var[LUMA_16x32] = pixel_var<16, 32>;
- p.var[LUMA_16x64] = pixel_var<16, 64>;
-
p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
}
}
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/primitives.h
--- a/source/common/primitives.h Mon Nov 25 19:28:33 2013 +0530
+++ b/source/common/primitives.h Tue Nov 26 12:44:21 2013 +0530
@@ -268,7 +268,7 @@
calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];
transpose_t transpose[NUM_SQUARE_BLOCKS];
- var_t var[NUM_LUMA_PARTITIONS];
+ var_t var[NUM_SQUARE_BLOCKS];
ssim_4x4x2_core_t ssim_4x4x2_core;
ssim_end4_t ssim_end_4;
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Nov 25 19:28:33 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Nov 26 12:44:21 2013 +0530
@@ -416,16 +416,8 @@
p.var[LUMA_ ## W ## x ## H] = x265_pixel_var_ ## W ## x ## H ## cpu;
#define LUMA_VAR(cpu) \
- SETUP_PIXEL_VAR_DEF(8, 4, cpu); \
SETUP_PIXEL_VAR_DEF(8, 8, cpu); \
- SETUP_PIXEL_VAR_DEF(8, 16, cpu); \
- SETUP_PIXEL_VAR_DEF(8, 32, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 4, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 8, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 12, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 16, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 32, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 64, cpu);
+ SETUP_PIXEL_VAR_DEF(16, 16, cpu);
namespace x265 {
// private x265 namespace
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Mon Nov 25 19:28:33 2013 +0530
+++ b/source/common/x86/pixel-a.asm Tue Nov 26 12:44:21 2013 +0530
@@ -1254,12 +1254,6 @@
VAR_2ROW 8*SIZEOF_PIXEL, 16
VAR_END 16, 16
-cglobal pixel_var_8x16, 2,3
- FIX_STRIDES r1
- VAR_START 0
- VAR_2ROW r1, 8
- VAR_END 8, 16
-
cglobal pixel_var_8x8, 2,3
FIX_STRIDES r1
VAR_START 0
@@ -1301,18 +1295,6 @@
%if HIGH_BIT_DEPTH == 0
%macro VAR 0
-cglobal pixel_var_8x4, 2,3,8
- VAR_START 1
- lea r2, [r1 * 3]
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- VAR_END 8, 4
-
cglobal pixel_var_8x8, 2,3,8
VAR_START 1
lea r2, [r1 * 3]
@@ -1331,142 +1313,6 @@
VAR_CORE
VAR_END 8, 8
-
-cglobal pixel_var_8x16, 2,4,8
- VAR_START 1
- lea r2, [r1 * 3]
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- VAR_END 8, 16
-
-cglobal pixel_var_8x32, 2,4,8
- VAR_START 1
- mov r2d, 2
- lea r3, [r1 * 3]
-.loop:
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- dec r2d
- jnz .loop
- VAR_END 8, 32
-
-cglobal pixel_var_16x4, 2,3,8
- VAR_START 1
- lea r2, [r1 * 3]
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- VAR_END 16, 4
-
-cglobal pixel_var_16x8, 2,3,8
- VAR_START 1
- lea r2, [r1 * 3]
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- VAR_END 16, 8
-
-cglobal pixel_var_16x12, 2,3,8
- VAR_START 1
- lea r2, [r1 * 3]
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- VAR_END 16, 12
-
cglobal pixel_var_16x16, 2,3,8
VAR_START 1
lea r2, [r1 * 3]
@@ -1506,96 +1352,6 @@
DEINTB 1, 0, 4, 3, 7
VAR_CORE
VAR_END 16, 16
-
-cglobal pixel_var_16x32, 2,4,8
- VAR_START 1
- mov r2d, 2
- lea r3, [r1 * 3]
-.loop:
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- dec r2d
- jg .loop
- VAR_END 16, 32
-
-cglobal pixel_var_16x64, 2,4,8
- VAR_START 1
- mov r2d, 4
- lea r3, [r1 * 3]
-.loop:
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- dec r2d
- jg .loop
- VAR_END 16, 64
%endmacro ; VAR
INIT_XMM sse2
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Mon Nov 25 19:28:33 2013 +0530
+++ b/source/common/x86/pixel.h Tue Nov 26 12:44:21 2013 +0530
@@ -351,16 +351,8 @@
uint64_t x265_pixel_var_ ## W ## x ## H ## cpu(pixel *pix, intptr_t pixstride);
#define LUMA_PIXELVAR_DEF(cpu) \
- SETUP_LUMA_PIXELVAR_FUNC(8, 4, cpu); \
SETUP_LUMA_PIXELVAR_FUNC(8, 8, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(8, 16, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(8, 32, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 4, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 8, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 12, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 32, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 48, cpu);
+ SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu);
LUMA_PIXELVAR_DEF(_sse2);
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Mon Nov 25 19:28:33 2013 +0530
+++ b/source/test/pixelharness.cpp Tue Nov 26 12:44:21 2013 +0530
@@ -777,15 +777,6 @@
}
}
- if (opt.var[part])
- {
- if (!check_pixel_var(ref.var[part], opt.var[part]))
- {
- printf("var[%s]: failed!\n", lumaPartStr[part]);
- return false;
- }
- }
-
for(int i = 0; i < X265_CSP_COUNT; i++)
{
if (opt.chroma[i].copy_pp[part])
@@ -905,6 +896,15 @@
return false;
}
}
+
+ if (opt.var[i])
+ {
+ if (!check_pixel_var(ref.var[i], opt.var[i]))
+ {
+ printf("var[%dx%d] failed\n", 4 << i, 4 << i);
+ return false;
+ }
+ }
}
if (opt.cvt32to16_shr)
@@ -1080,12 +1080,6 @@
REPORT_SPEEDUP(opt.luma_add_ps[part], ref.luma_add_ps[part], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);
}
- if (opt.var[part])
- {
- HEADER("var[%s]", lumaPartStr[part]);
- REPORT_SPEEDUP(opt.var[part], ref.var[part], pbuf1, STRIDE);
- }
-
for (int i = 0; i < X265_CSP_COUNT; i++)
{
if (opt.chroma[i].copy_pp[part])
@@ -1179,6 +1173,12 @@
HEADER("transpose[%dx%d]", 4 << i, 4 << i);
REPORT_SPEEDUP(opt.transpose[i], ref.transpose[i], pbuf1, pbuf2, STRIDE);
}
+
+ if (opt.var[i])
+ {
+ HEADER("var[%dx%d]", 4 << i, 4 << i);
+ REPORT_SPEEDUP(opt.var[i], ref.var[i], pbuf1, STRIDE);
+ }
}
if (opt.cvt32to16_shr)
More information about the x265-devel
mailing list