[x264-devel] commit: fix an overflow in dct4x4dc_mmx (Loren Merritt )
git version control
git at videolan.org
Thu Nov 27 08:26:33 CET 2008
x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Thu Nov 27 02:37:46 2008 +0000| [2338e1301aded556be8b85c6c3b4050e562ed862] | committer: Loren Merritt
fix an overflow in dct4x4dc_mmx
(unlikely to have occurred in any real video)
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=2338e1301aded556be8b85c6c3b4050e562ed862
---
common/x86/dct-a.asm | 36 ++++++++++++++++----------
tools/checkasm.c | 68 ++++++++++++++++++++++---------------------------
2 files changed, 53 insertions(+), 51 deletions(-)
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
index e95006d..73922fc 100644
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -26,8 +26,8 @@
%include "x86util.asm"
SECTION_RODATA
-pw_1: times 8 dw 1
pw_32: times 8 dw 32
+pw_8000: times 8 dw 0x8000
pb_sub4frame: db 0,1,4,8,5,2,3,6,9,12,13,10,7,11,14,15
pb_scan4framea: db 12,13,6,7,14,15,0,1,8,9,2,3,4,5,10,11
pb_scan4frameb: db 0,1,8,9,2,3,4,5,10,11,12,13,6,7,14,15
@@ -40,6 +40,18 @@ SECTION .text
SWAP %1, %4, %3
%endmacro
+%macro SUMSUB_17BIT 4 ; a, b, tmp, 0x8000
+ movq m%3, m%4
+ paddw m%1, m%4
+ psubw m%3, m%2
+ paddw m%2, m%4
+ pavgw m%3, m%1
+ pavgw m%2, m%1
+ psubw m%3, m%4
+ psubw m%2, m%4
+ SWAP %1, %2, %3
+%endmacro
+
;-----------------------------------------------------------------------------
; void x264_dct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
@@ -48,22 +60,18 @@ cglobal x264_dct4x4dc_mmx, 1,1
movq m1, [r0+ 8]
movq m2, [r0+16]
movq m3, [r0+24]
+ movq m7, [pw_8000 GLOBAL] ; convert to unsigned and back, so that pavgw works
HADAMARD4_1D 0,1,2,3
TRANSPOSE4x4W 0,1,2,3,4
- HADAMARD4_1D 0,1,2,3
- movq m6, [pw_1 GLOBAL]
- paddw m0, m6
- paddw m1, m6
- paddw m2, m6
- paddw m3, m6
- psraw m0, 1
- psraw m1, 1
- psraw m2, 1
- psraw m3, 1
+ SUMSUB_BADC m1, m0, m3, m2
+ SWAP 0,1
+ SWAP 2,3
+ SUMSUB_17BIT 0,2,4,7
+ SUMSUB_17BIT 1,3,5,7
movq [r0+0], m0
- movq [r0+8], m1
- movq [r0+16], m2
- movq [r0+24], m3
+ movq [r0+8], m2
+ movq [r0+16], m3
+ movq [r0+24], m1
RET
;-----------------------------------------------------------------------------
diff --git a/tools/checkasm.c b/tools/checkasm.c
index a66b4e0..12110a5 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -466,7 +466,7 @@ static int check_dct( int cpu_ref, int cpu_new )
x264_dct_function_t dct_ref;
x264_dct_function_t dct_asm;
x264_quant_function_t qf;
- int ret = 0, ok, used_asm, i, interlace;
+ int ret = 0, ok, used_asm, i, j, interlace;
DECLARE_ALIGNED_16( int16_t dct1[16][4][4] );
DECLARE_ALIGNED_16( int16_t dct2[16][4][4] );
DECLARE_ALIGNED_16( int16_t dct4[16][4][4] );
@@ -560,40 +560,33 @@ static int check_dct( int cpu_ref, int cpu_new )
report( "add_idct8 :" );
#undef TEST_IDCT
- ok = 1; used_asm = 0;
- if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
- {
- DECLARE_ALIGNED_16( int16_t dct1[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}};
- DECLARE_ALIGNED_16( int16_t dct2[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}};
- set_func_name( "dct4x4dc" );
- used_asm = 1;
- call_c1( dct_c.dct4x4dc, dct1 );
- call_a1( dct_asm.dct4x4dc, dct2 );
- if( memcmp( dct1, dct2, 32 ) )
- {
- ok = 0;
- fprintf( stderr, " - dct4x4dc : [FAILED]\n" );
- }
- call_c2( dct_c.dct4x4dc, dct1 );
- call_a2( dct_asm.dct4x4dc, dct2 );
- }
- if( dct_asm.idct4x4dc != dct_ref.idct4x4dc )
- {
- DECLARE_ALIGNED_16( int16_t dct1[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}};
- DECLARE_ALIGNED_16( int16_t dct2[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}};
- set_func_name( "idct4x4dc" );
- used_asm = 1;
- call_c1( dct_c.idct4x4dc, dct1 );
- call_a1( dct_asm.idct4x4dc, dct2 );
- if( memcmp( dct1, dct2, 32 ) )
- {
- ok = 0;
- fprintf( stderr, " - idct4x4dc : [FAILED]\n" );
- }
- call_c2( dct_c.idct4x4dc, dct1 );
- call_a2( dct_asm.idct4x4dc, dct2 );
- }
- report( "(i)dct4x4dc :" );
+#define TEST_DCTDC( name )\
+ ok = 1; used_asm = 0;\
+ if( dct_asm.name != dct_ref.name )\
+ {\
+ set_func_name( #name );\
+ used_asm = 1;\
+ uint16_t *p = (uint16_t*)buf1;\
+ for( i=0; i<16 && ok; i++ )\
+ {\
+ for( j=0; j<16; j++ )\
+ dct1[0][0][j] = !i ? (j^j>>1^j>>2^j>>3)&1 ? 4080 : -4080 /* max dc */\
+ : i<8 ? (*p++)&1 ? 4080 : -4080 /* max elements */\
+ : ((*p++)&0x1fff)-0x1000; /* general case */\
+ memcpy( dct2, dct1, 32 );\
+ call_c1( dct_c.name, dct1[0] );\
+ call_a1( dct_asm.name, dct2[0] );\
+ if( memcmp( dct1, dct2, 32 ) )\
+ ok = 0;\
+ }\
+ call_c2( dct_c.name, dct1[0] );\
+ call_a2( dct_asm.name, dct2[0] );\
+ }\
+ report( #name " :" );
+
+ TEST_DCTDC( dct4x4dc );
+ TEST_DCTDC( idct4x4dc );
+#undef TEST_DCTDC
x264_zigzag_function_t zigzag_c;
x264_zigzag_function_t zigzag_ref;
@@ -1086,14 +1079,14 @@ static int check_quant( int cpu_ref, int cpu_new )
ok = oks[1]; used_asm = used_asms[1];
report( "dequant :" );
-
+ ok = 1;
if( qf_a.denoise_dct != qf_ref.denoise_dct )
{
int size;
+ used_asm = 1;
for( size = 16; size <= 64; size += 48 )
{
set_func_name( "denoise_dct" );
- used_asm = 1;
memcpy(dct1, buf1, size*2);
memcpy(dct2, buf1, size*2);
memcpy(buf3+256, buf3, 256);
@@ -1133,6 +1126,7 @@ static int check_quant( int cpu_ref, int cpu_new )
} \
}
+ ok = 1;
TEST_DECIMATE( quant_8x8, decimate_score64, CQM_8IY, 8, 0, 6 );
TEST_DECIMATE( quant_4x4, decimate_score16, CQM_4IY, 4, 0, 6 );
TEST_DECIMATE( quant_4x4, decimate_score15, CQM_4IY, 4, 1, 7 );
More information about the x264-devel
mailing list