[x264-devel] Split prefetch_fenc between colorspaces

Jason Garrett-Glaser git at videolan.org
Sat Oct 22 02:30:28 CEST 2011


x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Tue Oct 18 14:14:03 2011 -0700| [81a99842b76834c11a46438f354d7f2a9e89752a] | committer: Jason Garrett-Glaser

Split prefetch_fenc between colorspaces
Add 4:2:2 version.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=81a99842b76834c11a46438f354d7f2a9e89752a
---

 common/arm/mc-c.c   |    3 ++-
 common/mc.c         |    3 ++-
 common/mc.h         |    4 ++++
 common/x86/mc-a.asm |   25 +++++++++++++++++++++----
 common/x86/mc-c.c   |    6 ++++--
 encoder/encoder.c   |    3 +++
 6 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index c437dd3..c1fc05c 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -210,7 +210,8 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
         return;
 
 #if !HIGH_BIT_DEPTH
-    pf->prefetch_fenc = x264_prefetch_fenc_arm;
+    pf->prefetch_fenc_420 = x264_prefetch_fenc_arm;
+    pf->prefetch_fenc_422 = x264_prefetch_fenc_arm; /* FIXME */
     pf->prefetch_ref  = x264_prefetch_ref_arm;
 #endif // !HIGH_BIT_DEPTH
 
diff --git a/common/mc.c b/common/mc.c
index c2b77f5..6f772af 100644
--- a/common/mc.c
+++ b/common/mc.c
@@ -506,7 +506,8 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
 
     pf->hpel_filter = hpel_filter;
 
-    pf->prefetch_fenc = prefetch_fenc_null;
+    pf->prefetch_fenc_420 = prefetch_fenc_null;
+    pf->prefetch_fenc_422 = prefetch_fenc_null;
     pf->prefetch_ref  = prefetch_ref_null;
     pf->memcpy_aligned = memcpy;
     pf->memzero_aligned = memzero_aligned;
diff --git a/common/mc.h b/common/mc.h
index 09dda55..40fb591 100644
--- a/common/mc.h
+++ b/common/mc.h
@@ -103,6 +103,10 @@ typedef struct
     /* prefetch the next few macroblocks of fenc or fdec */
     void (*prefetch_fenc)( pixel *pix_y, int stride_y,
                            pixel *pix_uv, int stride_uv, int mb_x );
+    void (*prefetch_fenc_420)( pixel *pix_y, int stride_y,
+                               pixel *pix_uv, int stride_uv, int mb_x );
+    void (*prefetch_fenc_422)( pixel *pix_y, int stride_y,
+                               pixel *pix_uv, int stride_uv, int mb_x );
     /* prefetch the next few macroblocks of a hpel reference frame */
     void (*prefetch_ref)( pixel *pix, int stride, int parity );
 
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index a1b1cb0..2dd587e 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -1302,9 +1302,10 @@ MC_COPY 16
 ; void prefetch_fenc( pixel *pix_y, int stride_y,
 ;                     pixel *pix_uv, int stride_uv, int mb_x )
 ;-----------------------------------------------------------------------------
-INIT_MMX
+
+%macro PREFETCH_FENC 1
 %ifdef ARCH_X86_64
-cglobal prefetch_fenc_mmx2, 5,5
+cglobal prefetch_fenc_%1, 5,5
     FIX_STRIDES r1d, r3d
     and    r4d, 3
     mov    eax, r4d
@@ -1320,10 +1321,15 @@ cglobal prefetch_fenc_mmx2, 5,5
     lea    r2,  [r2+rax*2+64*SIZEOF_PIXEL]
     prefetcht0  [r2]
     prefetcht0  [r2+r3]
+%ifidn %1, 422
+    lea    r2,  [r2+r3*2]
+    prefetcht0  [r2]
+    prefetcht0  [r2+r3]
+%endif
     RET
 
 %else
-cglobal prefetch_fenc_mmx2, 0,3
+cglobal prefetch_fenc_%1, 0,3
     mov    r2, r4m
     mov    r1, r1m
     mov    r0, r0m
@@ -1346,13 +1352,24 @@ cglobal prefetch_fenc_mmx2, 0,3
     lea    r0, [r0+r2*2+64*SIZEOF_PIXEL]
     prefetcht0 [r0]
     prefetcht0 [r0+r1]
+%ifidn %1, 422
+    lea    r0,  [r0+r1*2]
+    prefetcht0  [r0]
+    prefetcht0  [r0+r1]
+%endif
     ret
 %endif ; ARCH_X86_64
+%endmacro
+
+INIT_MMX mmx2
+PREFETCH_FENC 420
+PREFETCH_FENC 422
 
 ;-----------------------------------------------------------------------------
 ; void prefetch_ref( pixel *pix, int stride, int parity )
 ;-----------------------------------------------------------------------------
-cglobal prefetch_ref_mmx2, 3,3
+INIT_MMX mmx2
+cglobal prefetch_ref, 3,3
     FIX_STRIDES r1d
     dec    r2d
     and    r2d, r1d
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
index 6d74780..1700f90 100644
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -86,7 +86,8 @@ void x264_mc_copy_w8_sse2( pixel *, int, pixel *, int, int );
 void x264_mc_copy_w16_mmx( pixel *, int, pixel *, int, int );
 void x264_mc_copy_w16_sse2( pixel *, int, pixel *, int, int );
 void x264_mc_copy_w16_aligned_sse2( pixel *, int, pixel *, int, int );
-void x264_prefetch_fenc_mmx2( pixel *, int, pixel *, int, int );
+void x264_prefetch_fenc_420_mmx2( pixel *, int, pixel *, int, int );
+void x264_prefetch_fenc_422_mmx2( pixel *, int, pixel *, int, int );
 void x264_prefetch_ref_mmx2( pixel *, int, int );
 void x264_plane_copy_core_mmx2( pixel *, int, pixel *, int, int w, int h);
 void x264_plane_copy_c( pixel *, int, pixel *, int, int w, int h );
@@ -517,7 +518,8 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
     if( !(cpu&X264_CPU_MMX2) )
         return;
 
-    pf->prefetch_fenc = x264_prefetch_fenc_mmx2;
+    pf->prefetch_fenc_420 = x264_prefetch_fenc_420_mmx2;
+    pf->prefetch_fenc_422 = x264_prefetch_fenc_422_mmx2;
     pf->prefetch_ref  = x264_prefetch_ref_mmx2;
 
     pf->plane_copy = x264_plane_copy_mmx2;
diff --git a/encoder/encoder.c b/encoder/encoder.c
index fa1b54f..72d716f 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -980,6 +980,7 @@ static void chroma_dsp_init( x264_t *h )
     {
         case CHROMA_420:
             memcpy( h->predict_chroma, h->predict_8x8c, sizeof(h->predict_chroma) );
+            h->mc.prefetch_fenc = h->mc.prefetch_fenc_420;
             h->loopf.deblock_chroma[0] = h->loopf.deblock_h_chroma_420;
             h->loopf.deblock_chroma_intra[0] = h->loopf.deblock_h_chroma_420_intra;
             h->loopf.deblock_chroma_mbaff = h->loopf.deblock_chroma_420_mbaff;
@@ -990,6 +991,7 @@ static void chroma_dsp_init( x264_t *h )
             break;
         case CHROMA_422:
             memcpy( h->predict_chroma, h->predict_8x16c, sizeof(h->predict_chroma) );
+            h->mc.prefetch_fenc = h->mc.prefetch_fenc_422;
             h->loopf.deblock_chroma[0] = h->loopf.deblock_h_chroma_422;
             h->loopf.deblock_chroma_intra[0] = h->loopf.deblock_h_chroma_422_intra;
             h->loopf.deblock_chroma_mbaff = h->loopf.deblock_chroma_422_mbaff;
@@ -999,6 +1001,7 @@ static void chroma_dsp_init( x264_t *h )
             h->quantf.coeff_level_run[DCT_CHROMA_DC] = h->quantf.coeff_level_run8;
             break;
         case CHROMA_444:
+            h->mc.prefetch_fenc = h->mc.prefetch_fenc_422; /* FIXME: doesn't cover V plane */
             h->loopf.deblock_chroma_mbaff = h->loopf.deblock_luma_mbaff;
             h->loopf.deblock_chroma_intra_mbaff = h->loopf.deblock_luma_intra_mbaff;
             break;



More information about the x264-devel mailing list