[x264-devel] [PATCHv3 1/1] nv21 input support
Yu Xiaolei
dreifachstein at gmail.com
Fri Jun 6 10:05:27 CEST 2014
nv21 input support
This eliminates an extra copy when recording from android camera preview data.
Checkasm test by Janne Grunau.
Assembly with improvements from Janne Grunau.
---
common/arm/mc-a.S | 24 ++++++++++++++++++++++++
common/arm/mc-c.c | 3 +++
common/frame.c | 8 ++++++++
common/mc.c | 12 ++++++++++++
common/mc.h | 1 +
encoder/encoder.c | 2 +-
filters/video/resize.c | 1 +
input/input.c | 1 +
tools/checkasm.c | 26 ++++++++++++++++++++++++++
x264.h | 25 +++++++++++++------------
10 files changed, 90 insertions(+), 13 deletions(-)
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index 330b852..4218fc4 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1597,6 +1597,30 @@ blocki:
pop {r4-r7, pc}
.endfunc
+function x264_plane_copy_swap_neon
+ push {r4-r5, lr}
+ ldrd r4, r5, [sp, #12]
+ add lr, r4, #15
+ bic lr, lr, #15
+ sub r1, r1, lr, lsl #1
+ sub r3, r3, lr, lsl #1
+1:
+ vld1.8 {q0, q1}, [r2]!
+ subs lr, lr, #16
+ vrev16.8 q0, q0
+ vrev16.8 q1, q1
+ vst1.8 {q0, q1}, [r0]!
+ bgt 1b
+
+ subs r5, r5, #1
+ add r0, r0, r1
+ add r2, r2, r3
+ mov lr, r4
+ bgt 1b
+
+ pop {r4-r5, pc}
+.endfunc
+
function x264_store_interleave_chroma_neon
push {lr}
ldr lr, [sp, #4]
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index 3805e73..73623c0 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -57,6 +57,8 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
void x264_plane_copy_interleave_neon( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
+void x264_plane_copy_swap_neon( pixel *dst, intptr_t i_dst,
+ pixel *src, intptr_t i_src, int w, int h);
void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
@@ -243,6 +245,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
+ pf->plane_copy_swap = x264_plane_copy_swap_neon;
pf->store_interleave_chroma = x264_store_interleave_chroma_neon;
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
diff --git a/common/frame.c b/common/frame.c
index a845181..871ad15 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -49,6 +49,7 @@ static int x264_frame_internal_csp( int external_csp )
case X264_CSP_NV12:
case X264_CSP_I420:
case X264_CSP_YV12:
+ case X264_CSP_NV21:
return X264_CSP_NV12;
case X264_CSP_NV16:
case X264_CSP_I422:
@@ -435,6 +436,13 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height>>v_shift );
}
+ else if ( i_csp == X264_CSP_NV21 )
+ {
+ get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift );
+ h->mc.plane_copy_swap( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
+ stride[1]/sizeof(pixel), h->param.i_width>>1, h->param.i_height>>v_shift );
+
+ }
else if( i_csp == X264_CSP_I420 || i_csp == X264_CSP_I422 || i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16 )
{
int uv_swap = i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16;
diff --git a/common/mc.c b/common/mc.c
index 6797f0a..ead4a6f 100644
--- a/common/mc.c
+++ b/common/mc.c
@@ -296,6 +296,17 @@ void x264_plane_copy_c( pixel *dst, intptr_t i_dst,
}
}
+void x264_plane_copy_swap_c( pixel *dst, intptr_t i_dst,
+ pixel *src, intptr_t i_src, int w, int h )
+{
+ for( int y=0; y<h; y++, dst+=i_dst, src+=i_src)
+ for ( int x=0; x<w; x++ )
+ {
+ dst[2*x] = src[2*x+1];
+ dst[2*x+1] = src[2*x];
+ }
+}
+
void x264_plane_copy_interleave_c( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h )
@@ -609,6 +620,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec;
pf->plane_copy = x264_plane_copy_c;
+ pf->plane_copy_swap = x264_plane_copy_swap_c;
pf->plane_copy_interleave = x264_plane_copy_interleave_c;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
diff --git a/common/mc.h b/common/mc.h
index 1e97499..eb0bc9f 100644
--- a/common/mc.h
+++ b/common/mc.h
@@ -86,6 +86,7 @@ typedef struct
void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, intptr_t i_src, int height );
void (*plane_copy)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
+ void (*plane_copy_swap)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
void (*plane_copy_interleave)( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
/* may write up to 15 pixels off the end of each plane */
diff --git a/encoder/encoder.c b/encoder/encoder.c
index fad8b3d..56d5fd7 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -480,7 +480,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
#endif
if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
{
- x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
+ x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
return -1;
}
diff --git a/filters/video/resize.c b/filters/video/resize.c
index 79fc89a..012e9c8 100644
--- a/filters/video/resize.c
+++ b/filters/video/resize.c
@@ -156,6 +156,7 @@ static int convert_csp_to_pix_fmt( int csp )
case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64 : AV_PIX_FMT_BGRA;
/* the next csp has no equivalent 16bit depth in swscale */
case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV12;
+ case X264_CSP_NV21: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV21;
/* the next csp is no supported by swscale at all */
case X264_CSP_NV16:
default: return AV_PIX_FMT_NONE;
diff --git a/input/input.c b/input/input.c
index c6bb5ac..5fd4257 100644
--- a/input/input.c
+++ b/input/input.c
@@ -33,6 +33,7 @@ const x264_cli_csp_t x264_cli_csps[] = {
[X264_CSP_YV16] = { "yv16", 3, { 1, .5, .5 }, { 1, 1, 1 }, 2, 1 },
[X264_CSP_YV24] = { "yv24", 3, { 1, 1, 1 }, { 1, 1, 1 }, 1, 1 },
[X264_CSP_NV12] = { "nv12", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
+ [X264_CSP_NV21] = { "nv21", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_NV16] = { "nv16", 2, { 1, 1 }, { 1, 1 }, 2, 1 },
[X264_CSP_BGR] = { "bgr", 1, { 3 }, { 1 }, 1, 1 },
[X264_CSP_BGRA] = { "bgra", 1, { 4 }, { 1 }, 1, 1 },
diff --git a/tools/checkasm.c b/tools/checkasm.c
index f72b7a0..d2099cb 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -1400,6 +1400,32 @@ static int check_mc( int cpu_ref, int cpu_new )
}
}
+ if( mc_a.plane_copy_swap != mc_ref.plane_copy_swap )
+ {
+ set_func_name( "plane_copy_swap" );
+ used_asm = 1;
+ for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+ {
+ int w = (plane_specs[i].w + 1) >> 1;
+ int h = plane_specs[i].h;
+ intptr_t src_stride = plane_specs[i].src_stride;
+ intptr_t dst_stride = (2*w + 127) & ~63;
+ assert( dst_stride * h <= 0x1000 );
+ pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
+ memset( pbuf3, 0, 0x1000*sizeof(pixel) );
+ memset( pbuf4, 0, 0x1000*sizeof(pixel) );
+ call_c( mc_c.plane_copy_swap, pbuf3, dst_stride, src1, src_stride, w, h );
+ call_a( mc_a.plane_copy_swap, pbuf4, dst_stride, src1, src_stride, w, h );
+ for( int y = 0; y < h; y++ )
+ if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*sizeof(pixel) ) )
+ {
+ ok = 0;
+ fprintf( stderr, "plane_copy_swap FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+ break;
+ }
+ }
+ }
+
if( mc_a.plane_copy_interleave != mc_ref.plane_copy_interleave )
{
set_func_name( "plane_copy_interleave" );
diff --git a/x264.h b/x264.h
index b94e4a9..388291c 100644
--- a/x264.h
+++ b/x264.h
@@ -41,7 +41,7 @@
#include "x264_config.h"
-#define X264_BUILD 142
+#define X264_BUILD 143
/* Application developers planning to link against a shared library version of
* libx264 from a Microsoft Visual Studio or similar development environment
@@ -211,17 +211,18 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
#define X264_CSP_NONE 0x0000 /* Invalid mode */
#define X264_CSP_I420 0x0001 /* yuv 4:2:0 planar */
#define X264_CSP_YV12 0x0002 /* yvu 4:2:0 planar */
-#define X264_CSP_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */
-#define X264_CSP_I422 0x0004 /* yuv 4:2:2 planar */
-#define X264_CSP_YV16 0x0005 /* yvu 4:2:2 planar */
-#define X264_CSP_NV16 0x0006 /* yuv 4:2:2, with one y plane and one packed u+v */
-#define X264_CSP_V210 0x0007 /* 10-bit yuv 4:2:2 packed in 32 */
-#define X264_CSP_I444 0x0008 /* yuv 4:4:4 planar */
-#define X264_CSP_YV24 0x0009 /* yvu 4:4:4 planar */
-#define X264_CSP_BGR 0x000a /* packed bgr 24bits */
-#define X264_CSP_BGRA 0x000b /* packed bgr 32bits */
-#define X264_CSP_RGB 0x000c /* packed rgb 24bits */
-#define X264_CSP_MAX 0x000d /* end of list */
+#define X264_CSP_NV21 0x0003 /* yuv 4:2:0, with one y plane and one packed v+u */
+#define X264_CSP_NV12 0x0004 /* yuv 4:2:0, with one y plane and one packed u+v */
+#define X264_CSP_I422 0x0005 /* yuv 4:2:2 planar */
+#define X264_CSP_YV16 0x0006 /* yvu 4:2:2 planar */
+#define X264_CSP_NV16 0x0007 /* yuv 4:2:2, with one y plane and one packed u+v */
+#define X264_CSP_V210 0x0008 /* 10-bit yuv 4:2:2 packed in 32 */
+#define X264_CSP_I444 0x0009 /* yuv 4:4:4 planar */
+#define X264_CSP_YV24 0x000a /* yvu 4:4:4 planar */
+#define X264_CSP_BGR 0x000b /* packed bgr 24bits */
+#define X264_CSP_BGRA 0x000c /* packed bgr 32bits */
+#define X264_CSP_RGB 0x000d /* packed rgb 24bits */
+#define X264_CSP_MAX 0x000e /* end of list */
#define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */
#define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */
--
2.0.0
More information about the x264-devel
mailing list