[x264-devel] Fix some store forwarding stalls
Jason Garrett-Glaser
git at videolan.org
Wed Feb 27 00:18:06 CET 2013
x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Wed Feb 6 16:55:39 2013 -0800| [9d600d64194e0b2a77a8d9aa3f05b141cf473af0] | committer: Jason Garrett-Glaser
Fix some store forwarding stalls
There's quite a few others, but most of them don't help to fix or there's no
easy way to avoid them.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=9d600d64194e0b2a77a8d9aa3f05b141cf473af0
---
common/x86/pixel-a.asm | 4 ++--
encoder/slicetype.c | 13 +++++++------
2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index 97adec5..bf0d27a 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -4410,8 +4410,8 @@ cglobal pixel_ads4, 5,7,12
punpckhqdq xmm5, xmm5
punpckhqdq xmm4, xmm4
%if ARCH_X86_64
- pshuflw xmm8, r6m, 0
- punpcklqdq xmm8, xmm8
+ movd xmm8, r6m
+ SPLATW xmm8, xmm8
ADS_START
movdqu xmm10, [r1]
movdqu xmm11, [r1+r2]
diff --git a/encoder/slicetype.c b/encoder/slicetype.c
index 99973b9..8a6c226 100644
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -633,15 +633,16 @@ lowres_intra_mb:
if( !fenc->b_intra_calculated )
{
ALIGNED_ARRAY_16( pixel, edge,[36] );
- pixel *pix = &pix1[8+FDEC_STRIDE - 1];
- pixel *src = &fenc->lowres[0][i_pel_offset - 1];
+ pixel *pix = &pix1[8+FDEC_STRIDE];
+ pixel *src = &fenc->lowres[0][i_pel_offset];
const int intra_penalty = 5 * a->i_lambda;
int satds[3];
+ int pixoff = 4 / sizeof(pixel);
- memcpy( pix-FDEC_STRIDE, src-i_stride, 17 * sizeof(pixel) );
- for( int i = 0; i < 8; i++ )
- pix[i*FDEC_STRIDE] = src[i*i_stride];
- pix++;
+ /* Avoid store forwarding stalls by writing larger chunks */
+ memcpy( pix-FDEC_STRIDE, src-i_stride, 16 * sizeof(pixel) );
+ for( int i = -1; i < 8; i++ )
+ M32( &pix[i*FDEC_STRIDE-pixoff] ) = M32( &src[i*i_stride-pixoff] );
h->pixf.intra_mbcmp_x3_8x8c( h->mb.pic.p_fenc[0], pix, satds );
int i_icost = X264_MIN3( satds[0], satds[1], satds[2] );
More information about the x264-devel
mailing list