[x264-devel] Fix inappropriate instruction use
Anton Mitrofanov
git at videolan.org
Thu Nov 13 13:52:03 CET 2014
x264 | branch: master | Anton Mitrofanov <BugMaster at narod.ru> | Thu Aug 28 20:13:13 2014 +0400| [9df377f87702c82a2202d34919c07e32c60b40ae] | committer: Fiona Glaser
Fix inappropriate instruction use
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=9df377f87702c82a2202d34919c07e32c60b40ae
---
common/dct.c | 2 +-
common/quant.c | 4 ++--
common/x86/dct-a.asm | 2 +-
common/x86/dct.h | 2 +-
common/x86/pixel-a.asm | 2 +-
common/x86/quant-a.asm | 2 +-
common/x86/quant.h | 4 ++--
7 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/common/dct.c b/common/dct.c
index f5900ef..08f4e89 100644
--- a/common/dct.c
+++ b/common/dct.c
@@ -611,7 +611,6 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
{
dctf->sub4x4_dct = x264_sub4x4_dct_mmx;
dctf->add4x4_idct = x264_add4x4_idct_mmx;
- dctf->dct4x4dc = x264_dct4x4dc_mmx;
dctf->idct4x4dc = x264_idct4x4dc_mmx;
dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2;
@@ -630,6 +629,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
if( cpu&X264_CPU_MMX2 )
{
+ dctf->dct4x4dc = x264_dct4x4dc_mmx2;
dctf->add8x8_idct_dc = x264_add8x8_idct_dc_mmx2;
dctf->add16x16_idct_dc = x264_add16x16_idct_dc_mmx2;
}
diff --git a/common/quant.c b/common/quant.c
index d7b6911..31d8901 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -558,8 +558,6 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
if( cpu&X264_CPU_MMX )
{
#if ARCH_X86
- pf->quant_4x4 = x264_quant_4x4_mmx;
- pf->quant_8x8 = x264_quant_8x8_mmx;
pf->dequant_4x4 = x264_dequant_4x4_mmx;
pf->dequant_4x4_dc = x264_dequant_4x4dc_mmx2;
pf->dequant_8x8 = x264_dequant_8x8_mmx;
@@ -576,6 +574,8 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
{
pf->quant_2x2_dc = x264_quant_2x2_dc_mmx2;
#if ARCH_X86
+ pf->quant_4x4 = x264_quant_4x4_mmx2;
+ pf->quant_8x8 = x264_quant_8x8_mmx2;
pf->quant_4x4_dc = x264_quant_4x4_dc_mmx2;
pf->decimate_score15 = x264_decimate_score15_mmx2;
pf->decimate_score16 = x264_decimate_score16_mmx2;
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
index 4376e36..bc82ff6 100644
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -143,7 +143,7 @@ INIT_XMM avx
DCT4x4_DC
%else
-INIT_MMX mmx
+INIT_MMX mmx2
cglobal dct4x4dc, 1,1
movq m3, [r0+24]
movq m2, [r0+16]
diff --git a/common/x86/dct.h b/common/x86/dct.h
index 337a632..f22a979 100644
--- a/common/x86/dct.h
+++ b/common/x86/dct.h
@@ -70,7 +70,7 @@ void x264_add8x8_idct_dc_avx ( pixel *p_dst, dctcoef dct [ 4] );
void x264_add16x16_idct_dc_avx ( pixel *p_dst, dctcoef dct [16] );
void x264_add16x16_idct_dc_avx2 ( uint8_t *p_dst, int16_t dct [16] );
-void x264_dct4x4dc_mmx ( int16_t d[16] );
+void x264_dct4x4dc_mmx2 ( int16_t d[16] );
void x264_dct4x4dc_sse2 ( int32_t d[16] );
void x264_dct4x4dc_avx ( int32_t d[16] );
void x264_idct4x4dc_mmx ( int16_t d[16] );
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index 262c537..f5f6a82 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -1600,7 +1600,7 @@ cglobal pixel_satd_4x4, 4,6
%macro SATDS_SSE2 0
%define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH)
-%if vertical==0 || HIGH_BIT_DEPTH
+%if cpuflag(ssse3) && (vertical==0 || HIGH_BIT_DEPTH)
cglobal pixel_satd_4x4, 4, 6, 6
SATD_START_MMX
mova m4, [hmul_4p]
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index fb588d3..731f7d1 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -453,7 +453,7 @@ INIT_MMX mmx2
QUANT_DC quant_2x2_dc, 1
%if ARCH_X86_64 == 0 ; not needed because sse2 is faster
QUANT_DC quant_4x4_dc, 4
-INIT_MMX mmx
+INIT_MMX mmx2
QUANT_AC quant_4x4, 4
QUANT_AC quant_8x8, 16
%endif
diff --git a/common/x86/quant.h b/common/x86/quant.h
index 1fcb800..c6a8a9b 100644
--- a/common/x86/quant.h
+++ b/common/x86/quant.h
@@ -30,8 +30,8 @@
int x264_quant_2x2_dc_mmx2( dctcoef dct[4], int mf, int bias );
int x264_quant_4x4_dc_mmx2( dctcoef dct[16], int mf, int bias );
-int x264_quant_4x4_mmx( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
-int x264_quant_8x8_mmx( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
+int x264_quant_4x4_mmx2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
+int x264_quant_8x8_mmx2( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
int x264_quant_2x2_dc_sse2( dctcoef dct[16], int mf, int bias );
int x264_quant_4x4_dc_sse2( dctcoef dct[16], int mf, int bias );
int x264_quant_4x4_sse2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
More information about the x264-devel
mailing list