[x265] [PATCH] asm: removed unused code in pixel_var module
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Tue Nov 26 09:01:43 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1385452882 -19800
# Tue Nov 26 13:31:22 2013 +0530
# Node ID e17784926dd2a5a3145557655c979fd564308fb2
# Parent 9e9767a887e3a91c0953b9bfa17c2f34f03ecf11
asm: removed unused code in pixel_var module
diff -r 9e9767a887e3 -r e17784926dd2 source/common/pixel.cpp
--- a/source/common/pixel.cpp Mon Nov 25 19:28:33 2013 +0530
+++ b/source/common/pixel.cpp Tue Nov 26 13:31:22 2013 +0530
@@ -673,14 +673,14 @@
return ssim;
}
-template<int w, int h>
+template<int size>
uint64_t pixel_var(pixel *pix, intptr_t i_stride)
{
uint32_t sum = 0, sqr = 0;
- for (int y = 0; y < h; y++)
+ for (int y = 0; y < size; y++)
{
- for (int x = 0; x < w; x++)
+ for (int x = 0; x < size; x++)
{
sum += pix[x];
sqr += pix[x] * pix[x];
@@ -968,17 +968,8 @@
p.ssim_4x4x2_core = ssim_4x4x2_core;
p.ssim_end_4 = ssim_end_4;
- p.var[LUMA_8x4] = pixel_var<8, 4>;
- p.var[LUMA_8x8] = pixel_var<8, 8>;
- p.var[LUMA_8x16] = pixel_var<8, 16>;
- p.var[LUMA_8x32] = pixel_var<8, 32>;
- p.var[LUMA_16x4] = pixel_var<16, 4>;
- p.var[LUMA_16x8] = pixel_var<16, 8>;
- p.var[LUMA_16x12] = pixel_var<16, 12>;
- p.var[LUMA_16x16] = pixel_var<16, 16>;
- p.var[LUMA_16x32] = pixel_var<16, 32>;
- p.var[LUMA_16x64] = pixel_var<16, 64>;
-
+ p.var[BLOCK_8x8] = pixel_var<8>;
+ p.var[BLOCK_16x16] = pixel_var<16>;
p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
}
}
diff -r 9e9767a887e3 -r e17784926dd2 source/common/primitives.h
--- a/source/common/primitives.h Mon Nov 25 19:28:33 2013 +0530
+++ b/source/common/primitives.h Tue Nov 26 13:31:22 2013 +0530
@@ -268,7 +268,7 @@
calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];
transpose_t transpose[NUM_SQUARE_BLOCKS];
- var_t var[NUM_LUMA_PARTITIONS];
+ var_t var[NUM_SQUARE_BLOCKS];
ssim_4x4x2_core_t ssim_4x4x2_core;
ssim_end4_t ssim_end_4;
diff -r 9e9767a887e3 -r e17784926dd2 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Nov 25 19:28:33 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Nov 26 13:31:22 2013 +0530
@@ -413,19 +413,11 @@
SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu);
#define SETUP_PIXEL_VAR_DEF(W, H, cpu) \
- p.var[LUMA_ ## W ## x ## H] = x265_pixel_var_ ## W ## x ## H ## cpu;
+ p.var[BLOCK_ ## W ## x ## H] = x265_pixel_var_ ## W ## x ## H ## cpu;
#define LUMA_VAR(cpu) \
- SETUP_PIXEL_VAR_DEF(8, 4, cpu); \
SETUP_PIXEL_VAR_DEF(8, 8, cpu); \
- SETUP_PIXEL_VAR_DEF(8, 16, cpu); \
- SETUP_PIXEL_VAR_DEF(8, 32, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 4, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 8, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 12, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 16, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 32, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 64, cpu);
+ SETUP_PIXEL_VAR_DEF(16, 16, cpu);
namespace x265 {
// private x265 namespace
diff -r 9e9767a887e3 -r e17784926dd2 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Mon Nov 25 19:28:33 2013 +0530
+++ b/source/common/x86/pixel-a.asm Tue Nov 26 13:31:22 2013 +0530
@@ -1254,12 +1254,6 @@
VAR_2ROW 8*SIZEOF_PIXEL, 16
VAR_END 16, 16
-cglobal pixel_var_8x16, 2,3
- FIX_STRIDES r1
- VAR_START 0
- VAR_2ROW r1, 8
- VAR_END 8, 16
-
cglobal pixel_var_8x8, 2,3
FIX_STRIDES r1
VAR_START 0
@@ -1301,18 +1295,6 @@
%if HIGH_BIT_DEPTH == 0
%macro VAR 0
-cglobal pixel_var_8x4, 2,3,8
- VAR_START 1
- lea r2, [r1 * 3]
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- VAR_END 8, 4
-
cglobal pixel_var_8x8, 2,3,8
VAR_START 1
lea r2, [r1 * 3]
@@ -1331,142 +1313,6 @@
VAR_CORE
VAR_END 8, 8
-
-cglobal pixel_var_8x16, 2,4,8
- VAR_START 1
- lea r2, [r1 * 3]
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- VAR_END 8, 16
-
-cglobal pixel_var_8x32, 2,4,8
- VAR_START 1
- mov r2d, 2
- lea r3, [r1 * 3]
-.loop:
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- movh m0, [r0]
- movh m3, [r0 + r1]
- movhps m0, [r0 + r1 * 2]
- movhps m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- dec r2d
- jnz .loop
- VAR_END 8, 32
-
-cglobal pixel_var_16x4, 2,3,8
- VAR_START 1
- lea r2, [r1 * 3]
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- VAR_END 16, 4
-
-cglobal pixel_var_16x8, 2,3,8
- VAR_START 1
- lea r2, [r1 * 3]
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- VAR_END 16, 8
-
-cglobal pixel_var_16x12, 2,3,8
- VAR_START 1
- lea r2, [r1 * 3]
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r2]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- VAR_END 16, 12
-
cglobal pixel_var_16x16, 2,3,8
VAR_START 1
lea r2, [r1 * 3]
@@ -1506,96 +1352,6 @@
DEINTB 1, 0, 4, 3, 7
VAR_CORE
VAR_END 16, 16
-
-cglobal pixel_var_16x32, 2,4,8
- VAR_START 1
- mov r2d, 2
- lea r3, [r1 * 3]
-.loop:
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- dec r2d
- jg .loop
- VAR_END 16, 32
-
-cglobal pixel_var_16x64, 2,4,8
- VAR_START 1
- mov r2d, 4
- lea r3, [r1 * 3]
-.loop:
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- mova m0, [r0]
- mova m3, [r0 + r1]
- DEINTB 1, 0, 4, 3, 7
- VAR_CORE
- mova m0, [r0 + 2 * r1]
- mova m3, [r0 + r3]
- DEINTB 1, 0, 4, 3, 7
- lea r0, [r0 + r1 * 4]
- VAR_CORE
- dec r2d
- jg .loop
- VAR_END 16, 64
%endmacro ; VAR
INIT_XMM sse2
diff -r 9e9767a887e3 -r e17784926dd2 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Mon Nov 25 19:28:33 2013 +0530
+++ b/source/common/x86/pixel.h Tue Nov 26 13:31:22 2013 +0530
@@ -351,16 +351,8 @@
uint64_t x265_pixel_var_ ## W ## x ## H ## cpu(pixel *pix, intptr_t pixstride);
#define LUMA_PIXELVAR_DEF(cpu) \
- SETUP_LUMA_PIXELVAR_FUNC(8, 4, cpu); \
SETUP_LUMA_PIXELVAR_FUNC(8, 8, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(8, 16, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(8, 32, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 4, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 8, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 12, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 32, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 48, cpu);
+ SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu);
LUMA_PIXELVAR_DEF(_sse2);
diff -r 9e9767a887e3 -r e17784926dd2 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Mon Nov 25 19:28:33 2013 +0530
+++ b/source/encoder/ratecontrol.cpp Tue Nov 26 13:31:22 2013 +0530
@@ -68,10 +68,10 @@
{
ALIGN_VAR_8(pixel, pix[8 * 8]);
primitives.luma_copy_pp[LUMA_8x8](pix, 8, src, srcStride);
- return acEnergyVar(pic, primitives.var[LUMA_8x8](pix, 8), 6, bChroma);
+ return acEnergyVar(pic, primitives.var[BLOCK_8x8](pix, 8), 6, bChroma);
}
else
- return acEnergyVar(pic, primitives.var[LUMA_16x16](src, srcStride), 8, bChroma);
+ return acEnergyVar(pic, primitives.var[BLOCK_16x16](src, srcStride), 8, bChroma);
}
/* Find the total AC energy of each block in all planes */
diff -r 9e9767a887e3 -r e17784926dd2 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Mon Nov 25 19:28:33 2013 +0530
+++ b/source/test/pixelharness.cpp Tue Nov 26 13:31:22 2013 +0530
@@ -777,15 +777,6 @@
}
}
- if (opt.var[part])
- {
- if (!check_pixel_var(ref.var[part], opt.var[part]))
- {
- printf("var[%s]: failed!\n", lumaPartStr[part]);
- return false;
- }
- }
-
for(int i = 0; i < X265_CSP_COUNT; i++)
{
if (opt.chroma[i].copy_pp[part])
@@ -905,6 +896,15 @@
return false;
}
}
+
+ if (opt.var[i])
+ {
+ if (!check_pixel_var(ref.var[i], opt.var[i]))
+ {
+ printf("var[%dx%d] failed\n", 4 << i, 4 << i);
+ return false;
+ }
+ }
}
if (opt.cvt32to16_shr)
@@ -1080,12 +1080,6 @@
REPORT_SPEEDUP(opt.luma_add_ps[part], ref.luma_add_ps[part], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);
}
- if (opt.var[part])
- {
- HEADER("var[%s]", lumaPartStr[part]);
- REPORT_SPEEDUP(opt.var[part], ref.var[part], pbuf1, STRIDE);
- }
-
for (int i = 0; i < X265_CSP_COUNT; i++)
{
if (opt.chroma[i].copy_pp[part])
@@ -1179,6 +1173,12 @@
HEADER("transpose[%dx%d]", 4 << i, 4 << i);
REPORT_SPEEDUP(opt.transpose[i], ref.transpose[i], pbuf1, pbuf2, STRIDE);
}
+
+ if (opt.var[i])
+ {
+ HEADER("var[%dx%d]", 4 << i, 4 << i);
+ REPORT_SPEEDUP(opt.var[i], ref.var[i], pbuf1, STRIDE);
+ }
}
if (opt.cvt32to16_shr)
More information about the x265-devel
mailing list