[x264-devel] Split prefetch_fenc between colorspaces
Jason Garrett-Glaser
git at videolan.org
Sat Oct 22 02:30:28 CEST 2011
x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Tue Oct 18 14:14:03 2011 -0700| [81a99842b76834c11a46438f354d7f2a9e89752a] | committer: Jason Garrett-Glaser
Split prefetch_fenc between colorspaces
Add 4:2:2 version.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=81a99842b76834c11a46438f354d7f2a9e89752a
---
common/arm/mc-c.c | 3 ++-
common/mc.c | 3 ++-
common/mc.h | 4 ++++
common/x86/mc-a.asm | 25 +++++++++++++++++++++----
common/x86/mc-c.c | 6 ++++--
encoder/encoder.c | 3 +++
6 files changed, 36 insertions(+), 8 deletions(-)
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index c437dd3..c1fc05c 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -210,7 +210,8 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
return;
#if !HIGH_BIT_DEPTH
- pf->prefetch_fenc = x264_prefetch_fenc_arm;
+ pf->prefetch_fenc_420 = x264_prefetch_fenc_arm;
+ pf->prefetch_fenc_422 = x264_prefetch_fenc_arm; /* FIXME */
pf->prefetch_ref = x264_prefetch_ref_arm;
#endif // !HIGH_BIT_DEPTH
diff --git a/common/mc.c b/common/mc.c
index c2b77f5..6f772af 100644
--- a/common/mc.c
+++ b/common/mc.c
@@ -506,7 +506,8 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
pf->hpel_filter = hpel_filter;
- pf->prefetch_fenc = prefetch_fenc_null;
+ pf->prefetch_fenc_420 = prefetch_fenc_null;
+ pf->prefetch_fenc_422 = prefetch_fenc_null;
pf->prefetch_ref = prefetch_ref_null;
pf->memcpy_aligned = memcpy;
pf->memzero_aligned = memzero_aligned;
diff --git a/common/mc.h b/common/mc.h
index 09dda55..40fb591 100644
--- a/common/mc.h
+++ b/common/mc.h
@@ -103,6 +103,10 @@ typedef struct
/* prefetch the next few macroblocks of fenc or fdec */
void (*prefetch_fenc)( pixel *pix_y, int stride_y,
pixel *pix_uv, int stride_uv, int mb_x );
+ void (*prefetch_fenc_420)( pixel *pix_y, int stride_y,
+ pixel *pix_uv, int stride_uv, int mb_x );
+ void (*prefetch_fenc_422)( pixel *pix_y, int stride_y,
+ pixel *pix_uv, int stride_uv, int mb_x );
/* prefetch the next few macroblocks of a hpel reference frame */
void (*prefetch_ref)( pixel *pix, int stride, int parity );
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index a1b1cb0..2dd587e 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -1302,9 +1302,10 @@ MC_COPY 16
; void prefetch_fenc( pixel *pix_y, int stride_y,
; pixel *pix_uv, int stride_uv, int mb_x )
;-----------------------------------------------------------------------------
-INIT_MMX
+
+%macro PREFETCH_FENC 1
%ifdef ARCH_X86_64
-cglobal prefetch_fenc_mmx2, 5,5
+cglobal prefetch_fenc_%1, 5,5
FIX_STRIDES r1d, r3d
and r4d, 3
mov eax, r4d
@@ -1320,10 +1321,15 @@ cglobal prefetch_fenc_mmx2, 5,5
lea r2, [r2+rax*2+64*SIZEOF_PIXEL]
prefetcht0 [r2]
prefetcht0 [r2+r3]
+%ifidn %1, 422
+ lea r2, [r2+r3*2]
+ prefetcht0 [r2]
+ prefetcht0 [r2+r3]
+%endif
RET
%else
-cglobal prefetch_fenc_mmx2, 0,3
+cglobal prefetch_fenc_%1, 0,3
mov r2, r4m
mov r1, r1m
mov r0, r0m
@@ -1346,13 +1352,24 @@ cglobal prefetch_fenc_mmx2, 0,3
lea r0, [r0+r2*2+64*SIZEOF_PIXEL]
prefetcht0 [r0]
prefetcht0 [r0+r1]
+%ifidn %1, 422
+ lea r0, [r0+r1*2]
+ prefetcht0 [r0]
+ prefetcht0 [r0+r1]
+%endif
ret
%endif ; ARCH_X86_64
+%endmacro
+
+INIT_MMX mmx2
+PREFETCH_FENC 420
+PREFETCH_FENC 422
;-----------------------------------------------------------------------------
; void prefetch_ref( pixel *pix, int stride, int parity )
;-----------------------------------------------------------------------------
-cglobal prefetch_ref_mmx2, 3,3
+INIT_MMX mmx2
+cglobal prefetch_ref, 3,3
FIX_STRIDES r1d
dec r2d
and r2d, r1d
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
index 6d74780..1700f90 100644
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -86,7 +86,8 @@ void x264_mc_copy_w8_sse2( pixel *, int, pixel *, int, int );
void x264_mc_copy_w16_mmx( pixel *, int, pixel *, int, int );
void x264_mc_copy_w16_sse2( pixel *, int, pixel *, int, int );
void x264_mc_copy_w16_aligned_sse2( pixel *, int, pixel *, int, int );
-void x264_prefetch_fenc_mmx2( pixel *, int, pixel *, int, int );
+void x264_prefetch_fenc_420_mmx2( pixel *, int, pixel *, int, int );
+void x264_prefetch_fenc_422_mmx2( pixel *, int, pixel *, int, int );
void x264_prefetch_ref_mmx2( pixel *, int, int );
void x264_plane_copy_core_mmx2( pixel *, int, pixel *, int, int w, int h);
void x264_plane_copy_c( pixel *, int, pixel *, int, int w, int h );
@@ -517,7 +518,8 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
if( !(cpu&X264_CPU_MMX2) )
return;
- pf->prefetch_fenc = x264_prefetch_fenc_mmx2;
+ pf->prefetch_fenc_420 = x264_prefetch_fenc_420_mmx2;
+ pf->prefetch_fenc_422 = x264_prefetch_fenc_422_mmx2;
pf->prefetch_ref = x264_prefetch_ref_mmx2;
pf->plane_copy = x264_plane_copy_mmx2;
diff --git a/encoder/encoder.c b/encoder/encoder.c
index fa1b54f..72d716f 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -980,6 +980,7 @@ static void chroma_dsp_init( x264_t *h )
{
case CHROMA_420:
memcpy( h->predict_chroma, h->predict_8x8c, sizeof(h->predict_chroma) );
+ h->mc.prefetch_fenc = h->mc.prefetch_fenc_420;
h->loopf.deblock_chroma[0] = h->loopf.deblock_h_chroma_420;
h->loopf.deblock_chroma_intra[0] = h->loopf.deblock_h_chroma_420_intra;
h->loopf.deblock_chroma_mbaff = h->loopf.deblock_chroma_420_mbaff;
@@ -990,6 +991,7 @@ static void chroma_dsp_init( x264_t *h )
break;
case CHROMA_422:
memcpy( h->predict_chroma, h->predict_8x16c, sizeof(h->predict_chroma) );
+ h->mc.prefetch_fenc = h->mc.prefetch_fenc_422;
h->loopf.deblock_chroma[0] = h->loopf.deblock_h_chroma_422;
h->loopf.deblock_chroma_intra[0] = h->loopf.deblock_h_chroma_422_intra;
h->loopf.deblock_chroma_mbaff = h->loopf.deblock_chroma_422_mbaff;
@@ -999,6 +1001,7 @@ static void chroma_dsp_init( x264_t *h )
h->quantf.coeff_level_run[DCT_CHROMA_DC] = h->quantf.coeff_level_run8;
break;
case CHROMA_444:
+ h->mc.prefetch_fenc = h->mc.prefetch_fenc_422; /* FIXME: doesn't cover V plane */
h->loopf.deblock_chroma_mbaff = h->loopf.deblock_luma_mbaff;
h->loopf.deblock_chroma_intra_mbaff = h->loopf.deblock_luma_intra_mbaff;
break;
More information about the x264-devel
mailing list