[x264-devel] [PATCHv3 1/1] nv21 input support

Yu Xiaolei dreifachstein at gmail.com
Fri Jun 6 10:05:27 CEST 2014


nv21 input support

This eliminates an extra copy when recording from android camera preview data.

Checkasm test by Janne Grunau.
Assembly with improvements from Janne Grunau.
---
 common/arm/mc-a.S      | 24 ++++++++++++++++++++++++
 common/arm/mc-c.c      |  3 +++
 common/frame.c         |  8 ++++++++
 common/mc.c            | 12 ++++++++++++
 common/mc.h            |  1 +
 encoder/encoder.c      |  2 +-
 filters/video/resize.c |  1 +
 input/input.c          |  1 +
 tools/checkasm.c       | 26 ++++++++++++++++++++++++++
 x264.h                 | 25 +++++++++++++------------
 10 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index 330b852..4218fc4 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1597,6 +1597,30 @@ blocki:
     pop             {r4-r7, pc}
 .endfunc
 
+function x264_plane_copy_swap_neon
+    push            {r4-r5, lr}
+    ldrd            r4, r5, [sp, #12]
+    add             lr,  r4,  #15
+    bic             lr,  lr,  #15
+    sub             r1,  r1,  lr, lsl #1
+    sub             r3,  r3,  lr, lsl #1
+1:
+    vld1.8          {q0, q1}, [r2]!
+    subs            lr,  lr,  #16
+    vrev16.8        q0,  q0
+    vrev16.8        q1,  q1
+    vst1.8          {q0, q1}, [r0]!
+    bgt             1b
+
+    subs            r5,  r5,  #1
+    add             r0,  r0,  r1
+    add             r2,  r2,  r3
+    mov             lr,  r4
+    bgt             1b
+
+    pop             {r4-r5, pc}
+.endfunc
+
 function x264_store_interleave_chroma_neon
     push            {lr}
     ldr             lr,  [sp, #4]
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index 3805e73..73623c0 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -57,6 +57,8 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
 void x264_plane_copy_interleave_neon( pixel *dst,  intptr_t i_dst,
                                       pixel *srcu, intptr_t i_srcu,
                                       pixel *srcv, intptr_t i_srcv, int w, int h );
+void x264_plane_copy_swap_neon( pixel *dst, intptr_t i_dst,
+                                pixel *src, intptr_t i_src, int w, int h);
 
 void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
 void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
@@ -243,6 +245,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
     pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
+    pf->plane_copy_swap = x264_plane_copy_swap_neon;
 
     pf->store_interleave_chroma = x264_store_interleave_chroma_neon;
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
diff --git a/common/frame.c b/common/frame.c
index a845181..871ad15 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -49,6 +49,7 @@ static int x264_frame_internal_csp( int external_csp )
         case X264_CSP_NV12:
         case X264_CSP_I420:
         case X264_CSP_YV12:
+        case X264_CSP_NV21:
             return X264_CSP_NV12;
         case X264_CSP_NV16:
         case X264_CSP_I422:
@@ -435,6 +436,13 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
             h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
                               stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height>>v_shift );
         }
+        else if ( i_csp == X264_CSP_NV21 )
+        {
+            get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift );
+            h->mc.plane_copy_swap( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
+                                   stride[1]/sizeof(pixel), h->param.i_width>>1, h->param.i_height>>v_shift );
+
+        }
         else if( i_csp == X264_CSP_I420 || i_csp == X264_CSP_I422 || i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16 )
         {
             int uv_swap = i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16;
diff --git a/common/mc.c b/common/mc.c
index 6797f0a..ead4a6f 100644
--- a/common/mc.c
+++ b/common/mc.c
@@ -296,6 +296,17 @@ void x264_plane_copy_c( pixel *dst, intptr_t i_dst,
     }
 }
 
+void x264_plane_copy_swap_c( pixel *dst, intptr_t i_dst,
+                             pixel *src, intptr_t i_src, int w, int h )
+{
+    for( int y=0; y<h; y++, dst+=i_dst, src+=i_src)
+        for ( int x=0; x<w; x++ )
+        {
+            dst[2*x]    = src[2*x+1];
+            dst[2*x+1]  = src[2*x];
+        }
+}
+
 void x264_plane_copy_interleave_c( pixel *dst,  intptr_t i_dst,
                                    pixel *srcu, intptr_t i_srcu,
                                    pixel *srcv, intptr_t i_srcv, int w, int h )
@@ -609,6 +620,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
     pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec;
 
     pf->plane_copy = x264_plane_copy_c;
+    pf->plane_copy_swap = x264_plane_copy_swap_c;
     pf->plane_copy_interleave = x264_plane_copy_interleave_c;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
diff --git a/common/mc.h b/common/mc.h
index 1e97499..eb0bc9f 100644
--- a/common/mc.h
+++ b/common/mc.h
@@ -86,6 +86,7 @@ typedef struct
     void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, intptr_t i_src, int height );
 
     void (*plane_copy)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
+    void (*plane_copy_swap)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
     void (*plane_copy_interleave)( pixel *dst,  intptr_t i_dst, pixel *srcu, intptr_t i_srcu,
                                    pixel *srcv, intptr_t i_srcv, int w, int h );
     /* may write up to 15 pixels off the end of each plane */
diff --git a/encoder/encoder.c b/encoder/encoder.c
index fad8b3d..56d5fd7 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -480,7 +480,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
 #endif
     if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
     {
-        x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
+        x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
         return -1;
     }
 
diff --git a/filters/video/resize.c b/filters/video/resize.c
index 79fc89a..012e9c8 100644
--- a/filters/video/resize.c
+++ b/filters/video/resize.c
@@ -156,6 +156,7 @@ static int convert_csp_to_pix_fmt( int csp )
         case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64    : AV_PIX_FMT_BGRA;
         /* the next csp has no equivalent 16bit depth in swscale */
         case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE      : AV_PIX_FMT_NV12;
+        case X264_CSP_NV21: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE      : AV_PIX_FMT_NV21;
         /* the next csp is no supported by swscale at all */
         case X264_CSP_NV16:
         default:            return AV_PIX_FMT_NONE;
diff --git a/input/input.c b/input/input.c
index c6bb5ac..5fd4257 100644
--- a/input/input.c
+++ b/input/input.c
@@ -33,6 +33,7 @@ const x264_cli_csp_t x264_cli_csps[] = {
     [X264_CSP_YV16] = { "yv16", 3, { 1, .5, .5 }, { 1,  1,  1 }, 2, 1 },
     [X264_CSP_YV24] = { "yv24", 3, { 1,  1,  1 }, { 1,  1,  1 }, 1, 1 },
     [X264_CSP_NV12] = { "nv12", 2, { 1,  1 },     { 1, .5 },     2, 2 },
+    [X264_CSP_NV21] = { "nv21", 2, { 1,  1 },     { 1, .5 },     2, 2 },
     [X264_CSP_NV16] = { "nv16", 2, { 1,  1 },     { 1,  1 },     2, 1 },
     [X264_CSP_BGR]  = { "bgr",  1, { 3 },         { 1 },         1, 1 },
     [X264_CSP_BGRA] = { "bgra", 1, { 4 },         { 1 },         1, 1 },
diff --git a/tools/checkasm.c b/tools/checkasm.c
index f72b7a0..d2099cb 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -1400,6 +1400,32 @@ static int check_mc( int cpu_ref, int cpu_new )
         }
     }
 
+    if( mc_a.plane_copy_swap != mc_ref.plane_copy_swap )
+    {
+        set_func_name( "plane_copy_swap" );
+        used_asm = 1;
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+        {
+            int w = (plane_specs[i].w + 1) >> 1;
+            int h = plane_specs[i].h;
+            intptr_t src_stride = plane_specs[i].src_stride;
+            intptr_t dst_stride = (2*w + 127) & ~63;
+            assert( dst_stride * h <= 0x1000 );
+            pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
+            memset( pbuf3, 0, 0x1000*sizeof(pixel) );
+            memset( pbuf4, 0, 0x1000*sizeof(pixel) );
+            call_c( mc_c.plane_copy_swap, pbuf3, dst_stride, src1, src_stride, w, h );
+            call_a( mc_a.plane_copy_swap, pbuf4, dst_stride, src1, src_stride, w, h );
+            for( int y = 0; y < h; y++ )
+                if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*sizeof(pixel) ) )
+                {
+                    ok = 0;
+                    fprintf( stderr, "plane_copy_swap FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+                    break;
+                }
+        }
+    }
+
     if( mc_a.plane_copy_interleave != mc_ref.plane_copy_interleave )
     {
         set_func_name( "plane_copy_interleave" );
diff --git a/x264.h b/x264.h
index b94e4a9..388291c 100644
--- a/x264.h
+++ b/x264.h
@@ -41,7 +41,7 @@
 
 #include "x264_config.h"
 
-#define X264_BUILD 142
+#define X264_BUILD 143
 
 /* Application developers planning to link against a shared library version of
  * libx264 from a Microsoft Visual Studio or similar development environment
@@ -211,17 +211,18 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
 #define X264_CSP_NONE           0x0000  /* Invalid mode     */
 #define X264_CSP_I420           0x0001  /* yuv 4:2:0 planar */
 #define X264_CSP_YV12           0x0002  /* yvu 4:2:0 planar */
-#define X264_CSP_NV12           0x0003  /* yuv 4:2:0, with one y plane and one packed u+v */
-#define X264_CSP_I422           0x0004  /* yuv 4:2:2 planar */
-#define X264_CSP_YV16           0x0005  /* yvu 4:2:2 planar */
-#define X264_CSP_NV16           0x0006  /* yuv 4:2:2, with one y plane and one packed u+v */
-#define X264_CSP_V210           0x0007  /* 10-bit yuv 4:2:2 packed in 32 */
-#define X264_CSP_I444           0x0008  /* yuv 4:4:4 planar */
-#define X264_CSP_YV24           0x0009  /* yvu 4:4:4 planar */
-#define X264_CSP_BGR            0x000a  /* packed bgr 24bits   */
-#define X264_CSP_BGRA           0x000b  /* packed bgr 32bits   */
-#define X264_CSP_RGB            0x000c  /* packed rgb 24bits   */
-#define X264_CSP_MAX            0x000d  /* end of list */
+#define X264_CSP_NV21           0x0003  /* yuv 4:2:0, with one y plane and one packed v+u */
+#define X264_CSP_NV12           0x0004  /* yuv 4:2:0, with one y plane and one packed u+v */
+#define X264_CSP_I422           0x0005  /* yuv 4:2:2 planar */
+#define X264_CSP_YV16           0x0006  /* yvu 4:2:2 planar */
+#define X264_CSP_NV16           0x0007  /* yuv 4:2:2, with one y plane and one packed u+v */
+#define X264_CSP_V210           0x0008  /* 10-bit yuv 4:2:2 packed in 32 */
+#define X264_CSP_I444           0x0009  /* yuv 4:4:4 planar */
+#define X264_CSP_YV24           0x000a  /* yvu 4:4:4 planar */
+#define X264_CSP_BGR            0x000b  /* packed bgr 24bits   */
+#define X264_CSP_BGRA           0x000c  /* packed bgr 32bits   */
+#define X264_CSP_RGB            0x000d  /* packed rgb 24bits   */
+#define X264_CSP_MAX            0x000e  /* end of list */
 #define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
 #define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
 
-- 
2.0.0



More information about the x264-devel mailing list