<div dir="ltr">Ignore this patch. Need some modifications in C code.<div><br></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Tue, Nov 26, 2013 at 12:45 PM, <span dir="ltr"><<a href="mailto:murugan@multicorewareinc.com" target="_blank">murugan@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Murugan Vairavel <<a href="mailto:murugan@multicorewareinc.com">murugan@multicorewareinc.com</a>><br>
# Date 1385450061 -19800<br>
# Tue Nov 26 12:44:21 2013 +0530<br>
# Node ID e866b2f9fcd2d4004e968243f18be1fa2a6c87a9<br>
# Parent 9e9767a887e3a91c0953b9bfa17c2f34f03ecf11<br>
asm: removed unused code in pixel_var module<br>
<br>
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/pixel.cpp<br>
--- a/source/common/pixel.cpp Mon Nov 25 19:28:33 2013 +0530<br>
+++ b/source/common/pixel.cpp Tue Nov 26 12:44:21 2013 +0530<br>
@@ -968,17 +968,8 @@<br>
p.ssim_4x4x2_core = ssim_4x4x2_core;<br>
p.ssim_end_4 = ssim_end_4;<br>
<br>
- p.var[LUMA_8x4] = pixel_var<8, 4>;<br>
p.var[LUMA_8x8] = pixel_var<8, 8>;<br>
- p.var[LUMA_8x16] = pixel_var<8, 16>;<br>
- p.var[LUMA_8x32] = pixel_var<8, 32>;<br>
- p.var[LUMA_16x4] = pixel_var<16, 4>;<br>
- p.var[LUMA_16x8] = pixel_var<16, 8>;<br>
- p.var[LUMA_16x12] = pixel_var<16, 12>;<br>
p.var[LUMA_16x16] = pixel_var<16, 16>;<br>
- p.var[LUMA_16x32] = pixel_var<16, 32>;<br>
- p.var[LUMA_16x64] = pixel_var<16, 64>;<br>
-<br>
p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;<br>
}<br>
}<br>
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/primitives.h<br>
--- a/source/common/primitives.h Mon Nov 25 19:28:33 2013 +0530<br>
+++ b/source/common/primitives.h Tue Nov 26 12:44:21 2013 +0530<br>
@@ -268,7 +268,7 @@<br>
calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];<br>
transpose_t transpose[NUM_SQUARE_BLOCKS];<br>
<br>
- var_t var[NUM_LUMA_PARTITIONS];<br>
+ var_t var[NUM_SQUARE_BLOCKS];<br>
ssim_4x4x2_core_t ssim_4x4x2_core;<br>
ssim_end4_t ssim_end_4;<br>
<br>
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Mon Nov 25 19:28:33 2013 +0530<br>
+++ b/source/common/x86/asm-primitives.cpp Tue Nov 26 12:44:21 2013 +0530<br>
@@ -416,16 +416,8 @@<br>
p.var[LUMA_ ## W ## x ## H] = x265_pixel_var_ ## W ## x ## H ## cpu;<br>
<br>
#define LUMA_VAR(cpu) \<br>
- SETUP_PIXEL_VAR_DEF(8, 4, cpu); \<br>
SETUP_PIXEL_VAR_DEF(8, 8, cpu); \<br>
- SETUP_PIXEL_VAR_DEF(8, 16, cpu); \<br>
- SETUP_PIXEL_VAR_DEF(8, 32, cpu); \<br>
- SETUP_PIXEL_VAR_DEF(16, 4, cpu); \<br>
- SETUP_PIXEL_VAR_DEF(16, 8, cpu); \<br>
- SETUP_PIXEL_VAR_DEF(16, 12, cpu); \<br>
- SETUP_PIXEL_VAR_DEF(16, 16, cpu); \<br>
- SETUP_PIXEL_VAR_DEF(16, 32, cpu); \<br>
- SETUP_PIXEL_VAR_DEF(16, 64, cpu);<br>
+ SETUP_PIXEL_VAR_DEF(16, 16, cpu);<br>
<br>
namespace x265 {<br>
// private x265 namespace<br>
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/pixel-a.asm<br>
--- a/source/common/x86/pixel-a.asm Mon Nov 25 19:28:33 2013 +0530<br>
+++ b/source/common/x86/pixel-a.asm Tue Nov 26 12:44:21 2013 +0530<br>
@@ -1254,12 +1254,6 @@<br>
VAR_2ROW 8*SIZEOF_PIXEL, 16<br>
VAR_END 16, 16<br>
<br>
-cglobal pixel_var_8x16, 2,3<br>
- FIX_STRIDES r1<br>
- VAR_START 0<br>
- VAR_2ROW r1, 8<br>
- VAR_END 8, 16<br>
-<br>
cglobal pixel_var_8x8, 2,3<br>
FIX_STRIDES r1<br>
VAR_START 0<br>
@@ -1301,18 +1295,6 @@<br>
<br>
%if HIGH_BIT_DEPTH == 0<br>
%macro VAR 0<br>
-cglobal pixel_var_8x4, 2,3,8<br>
- VAR_START 1<br>
- lea r2, [r1 * 3]<br>
- movh m0, [r0]<br>
- movh m3, [r0 + r1]<br>
- movhps m0, [r0 + r1 * 2]<br>
- movhps m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- VAR_END 8, 4<br>
-<br>
cglobal pixel_var_8x8, 2,3,8<br>
VAR_START 1<br>
lea r2, [r1 * 3]<br>
@@ -1331,142 +1313,6 @@<br>
VAR_CORE<br>
VAR_END 8, 8<br>
<br>
-<br>
-cglobal pixel_var_8x16, 2,4,8<br>
- VAR_START 1<br>
- lea r2, [r1 * 3]<br>
- movh m0, [r0]<br>
- movh m3, [r0 + r1]<br>
- movhps m0, [r0 + r1 * 2]<br>
- movhps m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- movh m0, [r0]<br>
- movh m3, [r0 + r1]<br>
- movhps m0, [r0 + r1 * 2]<br>
- movhps m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- movh m0, [r0]<br>
- movh m3, [r0 + r1]<br>
- movhps m0, [r0 + r1 * 2]<br>
- movhps m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- movh m0, [r0]<br>
- movh m3, [r0 + r1]<br>
- movhps m0, [r0 + r1 * 2]<br>
- movhps m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- VAR_END 8, 16<br>
-<br>
-cglobal pixel_var_8x32, 2,4,8<br>
- VAR_START 1<br>
- mov r2d, 2<br>
- lea r3, [r1 * 3]<br>
-.loop:<br>
- movh m0, [r0]<br>
- movh m3, [r0 + r1]<br>
- movhps m0, [r0 + r1 * 2]<br>
- movhps m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- movh m0, [r0]<br>
- movh m3, [r0 + r1]<br>
- movhps m0, [r0 + r1 * 2]<br>
- movhps m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- movh m0, [r0]<br>
- movh m3, [r0 + r1]<br>
- movhps m0, [r0 + r1 * 2]<br>
- movhps m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- movh m0, [r0]<br>
- movh m3, [r0 + r1]<br>
- movhps m0, [r0 + r1 * 2]<br>
- movhps m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- dec r2d<br>
- jnz .loop<br>
- VAR_END 8, 32<br>
-<br>
-cglobal pixel_var_16x4, 2,3,8<br>
- VAR_START 1<br>
- lea r2, [r1 * 3]<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- VAR_END 16, 4<br>
-<br>
-cglobal pixel_var_16x8, 2,3,8<br>
- VAR_START 1<br>
- lea r2, [r1 * 3]<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- VAR_END 16, 8<br>
-<br>
-cglobal pixel_var_16x12, 2,3,8<br>
- VAR_START 1<br>
- lea r2, [r1 * 3]<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r2]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- VAR_END 16, 12<br>
-<br>
cglobal pixel_var_16x16, 2,3,8<br>
VAR_START 1<br>
lea r2, [r1 * 3]<br>
@@ -1506,96 +1352,6 @@<br>
DEINTB 1, 0, 4, 3, 7<br>
VAR_CORE<br>
VAR_END 16, 16<br>
-<br>
-cglobal pixel_var_16x32, 2,4,8<br>
- VAR_START 1<br>
- mov r2d, 2<br>
- lea r3, [r1 * 3]<br>
-.loop:<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- dec r2d<br>
- jg .loop<br>
- VAR_END 16, 32<br>
-<br>
-cglobal pixel_var_16x64, 2,4,8<br>
- VAR_START 1<br>
- mov r2d, 4<br>
- lea r3, [r1 * 3]<br>
-.loop:<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- mova m0, [r0]<br>
- mova m3, [r0 + r1]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- VAR_CORE<br>
- mova m0, [r0 + 2 * r1]<br>
- mova m3, [r0 + r3]<br>
- DEINTB 1, 0, 4, 3, 7<br>
- lea r0, [r0 + r1 * 4]<br>
- VAR_CORE<br>
- dec r2d<br>
- jg .loop<br>
- VAR_END 16, 64<br>
%endmacro ; VAR<br>
<br>
INIT_XMM sse2<br>
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/pixel.h<br>
--- a/source/common/x86/pixel.h Mon Nov 25 19:28:33 2013 +0530<br>
+++ b/source/common/x86/pixel.h Tue Nov 26 12:44:21 2013 +0530<br>
@@ -351,16 +351,8 @@<br>
uint64_t x265_pixel_var_ ## W ## x ## H ## cpu(pixel *pix, intptr_t pixstride);<br>
<br>
#define LUMA_PIXELVAR_DEF(cpu) \<br>
- SETUP_LUMA_PIXELVAR_FUNC(8, 4, cpu); \<br>
SETUP_LUMA_PIXELVAR_FUNC(8, 8, cpu); \<br>
- SETUP_LUMA_PIXELVAR_FUNC(8, 16, cpu); \<br>
- SETUP_LUMA_PIXELVAR_FUNC(8, 32, cpu); \<br>
- SETUP_LUMA_PIXELVAR_FUNC(16, 4, cpu); \<br>
- SETUP_LUMA_PIXELVAR_FUNC(16, 8, cpu); \<br>
- SETUP_LUMA_PIXELVAR_FUNC(16, 12, cpu); \<br>
- SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu); \<br>
- SETUP_LUMA_PIXELVAR_FUNC(16, 32, cpu); \<br>
- SETUP_LUMA_PIXELVAR_FUNC(16, 48, cpu);<br>
+ SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu);<br>
<br>
LUMA_PIXELVAR_DEF(_sse2);<br>
<br>
diff -r 9e9767a887e3 -r e866b2f9fcd2 source/test/pixelharness.cpp<br>
--- a/source/test/pixelharness.cpp Mon Nov 25 19:28:33 2013 +0530<br>
+++ b/source/test/pixelharness.cpp Tue Nov 26 12:44:21 2013 +0530<br>
@@ -777,15 +777,6 @@<br>
}<br>
}<br>
<br>
- if (opt.var[part])<br>
- {<br>
- if (!check_pixel_var(ref.var[part], opt.var[part]))<br>
- {<br>
- printf("var[%s]: failed!\n", lumaPartStr[part]);<br>
- return false;<br>
- }<br>
- }<br>
-<br>
for(int i = 0; i < X265_CSP_COUNT; i++)<br>
{<br>
if (opt.chroma[i].copy_pp[part])<br>
@@ -905,6 +896,15 @@<br>
return false;<br>
}<br>
}<br>
+<br>
+ if (opt.var[i])<br>
+ {<br>
+ if (!check_pixel_var(ref.var[i], opt.var[i]))<br>
+ {<br>
+ printf("var[%dx%d] failed\n", 4 << i, 4 << i);<br>
+ return false;<br>
+ }<br>
+ }<br>
}<br>
<br>
if (opt.cvt32to16_shr)<br>
@@ -1080,12 +1080,6 @@<br>
REPORT_SPEEDUP(opt.luma_add_ps[part], ref.luma_add_ps[part], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);<br>
}<br>
<br>
- if (opt.var[part])<br>
- {<br>
- HEADER("var[%s]", lumaPartStr[part]);<br>
- REPORT_SPEEDUP(opt.var[part], ref.var[part], pbuf1, STRIDE);<br>
- }<br>
-<br>
for (int i = 0; i < X265_CSP_COUNT; i++)<br>
{<br>
if (opt.chroma[i].copy_pp[part])<br>
@@ -1179,6 +1173,12 @@<br>
HEADER("transpose[%dx%d]", 4 << i, 4 << i);<br>
REPORT_SPEEDUP(opt.transpose[i], ref.transpose[i], pbuf1, pbuf2, STRIDE);<br>
}<br>
+<br>
+ if (opt.var[i])<br>
+ {<br>
+ HEADER("var[%dx%d]", 4 << i, 4 << i);<br>
+ REPORT_SPEEDUP(opt.var[i], ref.var[i], pbuf1, STRIDE);<br>
+ }<br>
}<br>
<br>
if (opt.cvt32to16_shr)<br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br><div dir="ltr">With Regards,<div><br></div><div>Murugan. V</div><div>+919659287478</div></div>
</div>