[x264-devel] commit: fix a buffer overread on odd input resolutions (Loren Merritt )
git version control
git at videolan.org
Tue Feb 23 18:56:59 CET 2010
x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Tue Feb 23 17:55:18 2010 +0000| [8918e835cf33f8204734652a4675ed3e85b75e82] | committer: Loren Merritt
fix a buffer overread on odd input resolutions
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=8918e835cf33f8204734652a4675ed3e85b75e82
---
common/mc.c | 4 ++--
common/x86/mc-a2.asm | 50 +++++++++++++++++++-------------------------------
common/x86/mc-c.c | 18 ++++++++++++++++--
3 files changed, 37 insertions(+), 35 deletions(-)
diff --git a/common/mc.c b/common/mc.c
index ac740cf..d062af3 100644
--- a/common/mc.c
+++ b/common/mc.c
@@ -317,7 +317,7 @@ MC_COPY( 16 )
MC_COPY( 8 )
MC_COPY( 4 )
-static void plane_copy( uint8_t *dst, int i_dst,
+void x264_plane_copy_c( uint8_t *dst, int i_dst,
uint8_t *src, int i_src, int w, int h)
{
while( h-- )
@@ -483,7 +483,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
pf->copy[PIXEL_8x8] = mc_copy_w8;
pf->copy[PIXEL_4x4] = mc_copy_w4;
- pf->plane_copy = plane_copy;
+ pf->plane_copy = x264_plane_copy_c;
pf->hpel_filter = hpel_filter;
pf->prefetch_fenc = prefetch_fenc_null;
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 6343c58..ab7020e 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -598,47 +598,43 @@ cglobal x264_sfence
ret
;-----------------------------------------------------------------------------
-; void x264_plane_copy_mmxext( uint8_t *dst, int i_dst,
-; uint8_t *src, int i_src, int w, int h)
+; void x264_plane_copy_core_mmxext( uint8_t *dst, int i_dst,
+; uint8_t *src, int i_src, int w, int h)
;-----------------------------------------------------------------------------
-cglobal x264_plane_copy_mmxext, 6,7
+; assumes i_dst and w are multiples of 16, and i_dst>w
+cglobal x264_plane_copy_core_mmxext, 6,7
movsxdifnidn r1, r1d
movsxdifnidn r3, r3d
- add r4d, 3
- and r4d, ~3
- mov r6d, r4d
- and r6d, ~15
- sub r1, r6
- sub r3, r6
+ sub r1, r4
+ sub r3, r4
.loopy:
mov r6d, r4d
- sub r6d, 64
- jl .endx
+ cmp r6d, 64
+ jl .loop16
.loopx:
prefetchnta [r2+256]
movq mm0, [r2 ]
movq mm1, [r2+ 8]
- movq mm2, [r2+16]
- movq mm3, [r2+24]
- movq mm4, [r2+32]
- movq mm5, [r2+40]
- movq mm6, [r2+48]
- movq mm7, [r2+56]
movntq [r0 ], mm0
movntq [r0+ 8], mm1
+ movq mm2, [r2+16]
+ movq mm3, [r2+24]
movntq [r0+16], mm2
movntq [r0+24], mm3
+ movq mm4, [r2+32]
+ movq mm5, [r2+40]
movntq [r0+32], mm4
movntq [r0+40], mm5
+ movq mm6, [r2+48]
+ movq mm7, [r2+56]
movntq [r0+48], mm6
movntq [r0+56], mm7
add r2, 64
add r0, 64
sub r6d, 64
- jge .loopx
-.endx:
+ jg .loopx
prefetchnta [r2+256]
- add r6d, 48
+ cmp r6d, 16
jl .end16
.loop16:
movq mm0, [r2 ]
@@ -648,20 +644,12 @@ cglobal x264_plane_copy_mmxext, 6,7
add r2, 16
add r0, 16
sub r6d, 16
- jge .loop16
+ jg .loop16
.end16:
- add r6d, 12
- jl .end4
-.loop4:
- movd mm2, [r2+r6]
- movd [r0+r6], mm2
- sub r6d, 4
- jge .loop4
-.end4:
- add r2, r3
add r0, r1
+ add r2, r3
dec r5d
- jg .loopy
+ jg .loopy
sfence
emms
RET
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
index b3683a3..fd04392 100644
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -88,7 +88,8 @@ extern void x264_mc_chroma_ssse3( uint8_t *src, int i_src_stride,
extern void x264_mc_chroma_ssse3_cache64( uint8_t *src, int i_src_stride,
uint8_t *dst, int i_dst_stride,
int dx, int dy, int i_width, int i_height );
-extern void x264_plane_copy_mmxext( uint8_t *, int, uint8_t *, int, int w, int h);
+extern void x264_plane_copy_core_mmxext( uint8_t *, int, uint8_t *, int, int w, int h);
+extern void x264_plane_copy_c( uint8_t *, int, uint8_t *, int, int w, int h);
extern void *x264_memcpy_aligned_mmx( void * dst, const void * src, size_t n );
extern void *x264_memcpy_aligned_sse2( void * dst, const void * src, size_t n );
extern void x264_memzero_aligned_mmx( void * dst, int n );
@@ -339,10 +340,23 @@ void x264_hpel_filter_ssse3( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_
#else
HPEL(16, sse2, sse2, sse2, sse2)
HPEL(16, ssse3, ssse3, ssse3, ssse3)
-
#endif
HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2)
+static void x264_plane_copy_mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int w, int h)
+{
+ if( w < 256 ) { // tiny resolutions don't want non-temporal hints. dunno the exact threshold.
+ x264_plane_copy_c( dst, i_dst, src, i_src, w, h );
+ } else if(i_src > 0) {
+ // have to use plain memcpy on the last line (in memory order) to avoid overreading src
+ x264_plane_copy_core_mmxext( dst, i_dst, src, i_src, (w+15)&~15, h-1 );
+ memcpy( dst+i_dst*(h-1), src+i_src*(h-1), w );
+ } else {
+ memcpy( dst, src, w );
+ x264_plane_copy_core_mmxext( dst+i_dst, i_dst, src+i_src, i_src, (w+15)&~15, h-1 );
+ }
+}
+
void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
{
if( !(cpu&X264_CPU_MMX) )
More information about the x264-devel
mailing list