[x265] [PATCH] re-enable asm code for pixel_avg, the problem is miss EMMS
Min Chen
chenm003 at 163.com
Mon Nov 11 09:31:39 CET 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1384158060 -28800
# Node ID e1f09cac66aae9eb707276435a90331bdaf135d3
# Parent 9d74638c3640679d09264b793afdf3ffc58a9107
re-enable asm code for pixel_avg, the problem is miss EMMS
diff -r 9d74638c3640 -r e1f09cac66aa source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sat Nov 09 20:14:24 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp Mon Nov 11 16:21:00 2013 +0800
@@ -119,13 +119,18 @@
p.sa8d_inter[LUMA_64x16] = cmp<64, 16, 16, 16, x265_pixel_sa8d_16x16_ ## cpu>; \
p.sa8d_inter[LUMA_16x64] = cmp < 16, 64, 16, 16, x265_pixel_sa8d_16x16_ ## cpu >
-#define PIXEL_AVE(cpu) \
+#define PIXEL_AVG(cpu) \
p.pixelavg_pp[LUMA_16x16] = x265_pixel_avg_16x16_ ## cpu; \
p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \
p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \
p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \
p.pixelavg_pp[LUMA_8x4] = x265_pixel_avg_8x4_ ## cpu;
+#define PIXEL_AVG_W4(cpu) \
+ p.pixelavg_pp[LUMA_4x4] = x265_pixel_avg_4x4_ ## cpu; \
+ p.pixelavg_pp[LUMA_4x8] = x265_pixel_avg_4x8_ ## cpu; \
+ p.pixelavg_pp[LUMA_4x16] = x265_pixel_avg_4x16_ ## cpu;
+
#define SETUP_CHROMA_FUNC_DEF(W, H, cpu) \
p.chroma_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu;
@@ -293,10 +298,8 @@
p.sa8d[BLOCK_4x4] = x265_pixel_satd_4x4_mmx2;
p.frame_init_lowres_core = x265_frame_init_lowres_core_mmx2;
- //p.pixelavg_pp[LUMA_4x16] = x265_pixel_avg_4x16_mmx2;
- //p.pixelavg_pp[LUMA_4x8] = x265_pixel_avg_4x8_mmx2;
- //p.pixelavg_pp[LUMA_4x4] = x265_pixel_avg_4x4_mmx2;
- //PIXEL_AVE(sse2);
+ PIXEL_AVG(sse2);
+ PIXEL_AVG_W4(mmx2);
p.sad[LUMA_8x32] = x265_pixel_sad_8x32_sse2;
p.sad[LUMA_16x4] = x265_pixel_sad_16x4_sse2;
@@ -391,7 +394,8 @@
SA8D_INTER_FROM_BLOCK(ssse3);
p.sse_pp[LUMA_4x4] = x265_pixel_ssd_4x4_ssse3;
ASSGN_SSE(ssse3);
- //PIXEL_AVE(ssse3);
+ PIXEL_AVG(ssse3);
+ PIXEL_AVG_W4(ssse3);
p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3;
diff -r 9d74638c3640 -r e1f09cac66aa source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm Sat Nov 09 20:14:24 2013 -0600
+++ b/source/common/x86/mc-a.asm Mon Nov 11 16:21:00 2013 +0800
@@ -8,7 +8,7 @@
;* Laurent Aimar <fenrir at via.ecp.fr>
;* Dylan Yudaken <dyudaken at gmail.com>
;* Holger Lubitz <holger at lubitz.org>
-;* Min Chen <chenm001.163.com>
+;* Min Chen <chenm001 at 163.com>
;* Oskar Arvidsson <oskar at irock.se>
;*
;* This program is free software; you can redistribute it and/or modify
@@ -89,6 +89,9 @@
lea t0, [t0+t1*2*SIZEOF_PIXEL]
sub eax, 2
jg .height_loop
+ %ifidn movu,movq ; detect MMX
+ EMMS
+ %endif
RET
%endmacro
More information about the x265-devel
mailing list