[x265] [PATCH] re-enable asm code for pixel_avg, the problem is miss EMMS

Min Chen chenm003 at 163.com
Mon Nov 11 09:31:39 CET 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1384158060 -28800
# Node ID e1f09cac66aae9eb707276435a90331bdaf135d3
# Parent  9d74638c3640679d09264b793afdf3ffc58a9107
re-enable asm code for pixel_avg, the problem is miss EMMS

diff -r 9d74638c3640 -r e1f09cac66aa source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Sat Nov 09 20:14:24 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp	Mon Nov 11 16:21:00 2013 +0800
@@ -119,13 +119,18 @@
     p.sa8d_inter[LUMA_64x16] = cmp<64, 16, 16, 16, x265_pixel_sa8d_16x16_ ## cpu>; \
     p.sa8d_inter[LUMA_16x64] = cmp < 16, 64, 16, 16, x265_pixel_sa8d_16x16_ ## cpu >
 
-#define PIXEL_AVE(cpu) \
+#define PIXEL_AVG(cpu) \
     p.pixelavg_pp[LUMA_16x16] = x265_pixel_avg_16x16_ ## cpu; \
     p.pixelavg_pp[LUMA_16x8]  = x265_pixel_avg_16x8_ ## cpu; \
     p.pixelavg_pp[LUMA_8x16]  = x265_pixel_avg_8x16_ ## cpu; \
     p.pixelavg_pp[LUMA_8x8]   = x265_pixel_avg_8x8_ ## cpu; \
     p.pixelavg_pp[LUMA_8x4]   = x265_pixel_avg_8x4_ ## cpu;
 
+#define PIXEL_AVG_W4(cpu) \
+    p.pixelavg_pp[LUMA_4x4]  = x265_pixel_avg_4x4_ ## cpu; \
+    p.pixelavg_pp[LUMA_4x8]  = x265_pixel_avg_4x8_ ## cpu; \
+    p.pixelavg_pp[LUMA_4x16] = x265_pixel_avg_4x16_ ## cpu;
+
 #define SETUP_CHROMA_FUNC_DEF(W, H, cpu) \
     p.chroma_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
     p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu;
@@ -293,10 +298,8 @@
         p.sa8d[BLOCK_4x4] = x265_pixel_satd_4x4_mmx2;
         p.frame_init_lowres_core = x265_frame_init_lowres_core_mmx2;
 
-        //p.pixelavg_pp[LUMA_4x16] = x265_pixel_avg_4x16_mmx2;
-        //p.pixelavg_pp[LUMA_4x8]  = x265_pixel_avg_4x8_mmx2;
-        //p.pixelavg_pp[LUMA_4x4]  = x265_pixel_avg_4x4_mmx2;
-        //PIXEL_AVE(sse2);
+        PIXEL_AVG(sse2);
+        PIXEL_AVG_W4(mmx2);
 
         p.sad[LUMA_8x32]   = x265_pixel_sad_8x32_sse2;
         p.sad[LUMA_16x4]  = x265_pixel_sad_16x4_sse2;
@@ -391,7 +394,8 @@
         SA8D_INTER_FROM_BLOCK(ssse3);
         p.sse_pp[LUMA_4x4] = x265_pixel_ssd_4x4_ssse3;
         ASSGN_SSE(ssse3);
-        //PIXEL_AVE(ssse3);
+        PIXEL_AVG(ssse3);
+        PIXEL_AVG_W4(ssse3);
 
         p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
         p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3;
diff -r 9d74638c3640 -r e1f09cac66aa source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm	Sat Nov 09 20:14:24 2013 -0600
+++ b/source/common/x86/mc-a.asm	Mon Nov 11 16:21:00 2013 +0800
@@ -8,7 +8,7 @@
 ;*          Laurent Aimar <fenrir at via.ecp.fr>
 ;*          Dylan Yudaken <dyudaken at gmail.com>
 ;*          Holger Lubitz <holger at lubitz.org>
-;*          Min Chen <chenm001.163.com>
+;*          Min Chen <chenm001 at 163.com>
 ;*          Oskar Arvidsson <oskar at irock.se>
 ;*
 ;* This program is free software; you can redistribute it and/or modify
@@ -89,6 +89,9 @@
     lea  t0, [t0+t1*2*SIZEOF_PIXEL]
     sub eax, 2
     jg .height_loop
+ %ifidn movu,movq ; detect MMX
+    EMMS
+ %endif
     RET
 %endmacro
 



More information about the x265-devel mailing list