[x264-devel] commit: use x264_mc_copy_w16_sse2 in mc.copy, it was previously only in mc_luma (Loren Merritt )
git version control
git at videolan.org
Fri Mar 21 03:36:17 CET 2008
x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Thu Mar 20 19:35:54 2008 -0600| [32fb497fe346d3383937a05ee99a031c13a3ac4d]
use x264_mc_copy_w16_sse2 in mc.copy, it was previously only in mc_luma
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=32fb497fe346d3383937a05ee99a031c13a3ac4d
---
common/x86/mc-a.asm | 15 ++++++++++-----
common/x86/mc-c.c | 2 ++
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 0296fbd..b262e39 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -341,14 +341,15 @@ cglobal x264_mc_copy_w16_mmx, 5,7
jg .height_loop
REP_RET
-cglobal x264_mc_copy_w16_sse2, 5,7
+%macro COPY_W16_SSE2 2
+cglobal %1, 5,7
lea r6, [r3*3]
lea r5, [r1*3]
.height_loop:
- movdqu xmm0, [r2]
- movdqu xmm1, [r2+r3]
- movdqu xmm2, [r2+r3*2]
- movdqu xmm3, [r2+r6]
+ %2 xmm0, [r2]
+ %2 xmm1, [r2+r3]
+ %2 xmm2, [r2+r3*2]
+ %2 xmm3, [r2+r6]
movdqa [r0], xmm0
movdqa [r0+r1], xmm1
movdqa [r0+r1*2], xmm2
@@ -358,6 +359,10 @@ cglobal x264_mc_copy_w16_sse2, 5,7
sub r4d, 4
jg .height_loop
REP_RET
+%endmacro
+
+COPY_W16_SSE2 x264_mc_copy_w16_sse2, movdqu
+COPY_W16_SSE2 x264_mc_copy_w16_aligned_sse2, movdqa
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
index 5d855de..9c53451 100644
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -49,6 +49,7 @@ extern void x264_mc_copy_w4_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w8_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_sse2( uint8_t *, int, uint8_t *, int, int );
+extern void x264_mc_copy_w16_aligned_sse2( uint8_t *, int, uint8_t *, int, int );
extern void x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int );
extern void x264_pixel_avg_weight_w8_mmxext( uint8_t *, int, uint8_t *, int, int, int );
extern void x264_pixel_avg_weight_w16_mmxext( uint8_t *, int, uint8_t *, int, int, int );
@@ -215,6 +216,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf->mc_luma = mc_luma_sse2;
pf->get_ref = get_ref_sse2;
+ pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse2;
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_sse2;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_sse2;
}
More information about the x264-devel
mailing list