[x264-devel] [Git][videolan/x264][master] 2 commits: Fix alignment of chroma buffer for weightp
Anton Mitrofanov
gitlab at videolan.org
Wed Jan 27 16:58:16 UTC 2021
Anton Mitrofanov pushed to branch master at VideoLAN / x264
Commits:
e32bff16 by Anton Mitrofanov at 2021-01-26T21:49:17+03:00
Fix alignment of chroma buffer for weightp
In 10-bit mode pixel_asd8 expects 16-byte alignment for pix1 and pix2.
- - - - -
b3aadb76 by Anton Mitrofanov at 2021-01-26T21:49:17+03:00
Fix PADH alignment
Make pointers to padded buffers aligned both before and after padding.
- - - - -
6 changed files:
- common/frame.c
- common/frame.h
- common/mc.c
- encoder/analyse.c
- encoder/encoder.c
- encoder/slicetype.c
Changes:
=====================================
common/frame.c
=====================================
@@ -38,7 +38,7 @@ static int align_stride( int x, int align, int disalign )
static int align_plane_size( int x, int disalign )
{
if( !(x&(disalign-1)) )
- x += 128;
+ x += X264_MAX( 128, NATIVE_ALIGN ) / SIZEOF_PIXEL;
return x;
}
@@ -63,29 +63,28 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
int i_mb_count = h->mb.i_mb_count;
int i_stride, i_width, i_lines, luma_plane_count;
int i_padv = PADV << PARAM_INTERLACED;
- int align = 16;
+ int align = NATIVE_ALIGN / SIZEOF_PIXEL;
#if ARCH_X86 || ARCH_X86_64
if( h->param.cpu&X264_CPU_CACHELINE_64 || h->param.cpu&X264_CPU_AVX512 )
- align = 64;
+ align = 64 / SIZEOF_PIXEL;
else if( h->param.cpu&X264_CPU_CACHELINE_32 || h->param.cpu&X264_CPU_AVX )
- align = 32;
+ align = 32 / SIZEOF_PIXEL;
+ else
+ align = 16 / SIZEOF_PIXEL;
#endif
#if ARCH_PPC
- int disalign = 1<<9;
+ int disalign = (1<<9) / SIZEOF_PIXEL;
#else
- int disalign = 1<<10;
+ int disalign = (1<<10) / SIZEOF_PIXEL;
#endif
- /* ensure frame alignment after PADH is added */
- int padh_align = X264_MAX( align - PADH * SIZEOF_PIXEL, 0 ) / SIZEOF_PIXEL;
-
CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
PREALLOC_INIT
/* allocate frame data (+64 for extra data for me) */
i_width = h->mb.i_mb_width*16;
i_lines = h->mb.i_mb_height*16;
- i_stride = align_stride( i_width + 2*PADH, align, disalign );
+ i_stride = align_stride( i_width + PADH2, align, disalign );
if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
{
@@ -123,7 +122,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
frame->i_csp = i_csp;
frame->i_width_lowres = frame->i_width[0]/2;
frame->i_lines_lowres = frame->i_lines[0]/2;
- frame->i_stride_lowres = align_stride( frame->i_width_lowres + 2*PADH, align, disalign<<1 );
+ frame->i_stride_lowres = align_stride( frame->i_width_lowres + PADH2, align, disalign<<1 );
for( int i = 0; i < h->param.i_bframe + 2; i++ )
for( int j = 0; j < h->param.i_bframe + 2; j++ )
@@ -152,9 +151,9 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
{
int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
- PREALLOC( frame->buffer[1], (chroma_plane_size + padh_align) * SIZEOF_PIXEL );
+ PREALLOC( frame->buffer[1], chroma_plane_size * SIZEOF_PIXEL );
if( PARAM_INTERLACED )
- PREALLOC( frame->buffer_fld[1], (chroma_plane_size + padh_align) * SIZEOF_PIXEL );
+ PREALLOC( frame->buffer_fld[1], chroma_plane_size * SIZEOF_PIXEL );
}
/* all 4 luma planes allocated together, since the cacheline split code
@@ -167,9 +166,9 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
luma_plane_size *= 4;
/* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
- PREALLOC( frame->buffer[p], (luma_plane_size + padh_align) * SIZEOF_PIXEL );
+ PREALLOC( frame->buffer[p], luma_plane_size * SIZEOF_PIXEL );
if( PARAM_INTERLACED )
- PREALLOC( frame->buffer_fld[p], (luma_plane_size + padh_align) * SIZEOF_PIXEL );
+ PREALLOC( frame->buffer_fld[p], luma_plane_size * SIZEOF_PIXEL );
}
frame->b_duplicate = 0;
@@ -207,7 +206,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
{
int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
- PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * SIZEOF_PIXEL );
+ PREALLOC( frame->buffer_lowres, 4 * luma_plane_size * SIZEOF_PIXEL );
for( int j = 0; j <= !!h->param.i_bframe; j++ )
for( int i = 0; i <= h->param.i_bframe; i++ )
@@ -237,9 +236,9 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
{
int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
- frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH + padh_align;
+ frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH_ALIGN;
if( PARAM_INTERLACED )
- frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH + padh_align;
+ frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH_ALIGN;
}
for( int p = 0; p < luma_plane_count; p++ )
@@ -249,18 +248,18 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
{
for( int i = 0; i < 4; i++ )
{
- frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align;
+ frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH_ALIGN;
if( PARAM_INTERLACED )
- frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align;
+ frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH_ALIGN;
}
frame->plane[p] = frame->filtered[p][0];
frame->plane_fld[p] = frame->filtered_fld[p][0];
}
else
{
- frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH + padh_align;
+ frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH_ALIGN;
if( PARAM_INTERLACED )
- frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH + padh_align;
+ frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH_ALIGN;
}
}
@@ -270,7 +269,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
frame->mv16x16++;
if( h->param.analyse.i_me_method >= X264_ME_ESA )
- frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
+ frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH_ALIGN;
}
else
{
@@ -278,7 +277,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
{
int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
for( int i = 0; i < 4; i++ )
- frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size;
+ frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH_ALIGN + i * luma_plane_size;
for( int j = 0; j <= !!h->param.i_bframe; j++ )
for( int i = 0; i <= h->param.i_bframe; i++ )
=====================================
common/frame.h
=====================================
@@ -31,6 +31,8 @@
/* number of pixels past the edge of the frame, for motion estimation/compensation */
#define PADH 32
#define PADV 32
+#define PADH_ALIGN X264_MAX( PADH, NATIVE_ALIGN / SIZEOF_PIXEL )
+#define PADH2 (PADH_ALIGN + PADH)
typedef struct x264_frame
{
=====================================
common/mc.c
=====================================
@@ -749,15 +749,15 @@ void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
int stride = frame->i_stride[0];
if( start < 0 )
{
- memset( frame->integral - PADV * stride - PADH, 0, stride * sizeof(uint16_t) );
+ memset( frame->integral - PADV * stride - PADH_ALIGN, 0, stride * sizeof(uint16_t) );
start = -PADV;
}
if( b_end )
height += PADV-9;
for( int y = start; y < height; y++ )
{
- pixel *pix = frame->plane[0] + y * stride - PADH;
- uint16_t *sum8 = frame->integral + (y+1) * stride - PADH;
+ pixel *pix = frame->plane[0] + y * stride - PADH_ALIGN;
+ uint16_t *sum8 = frame->integral + (y+1) * stride - PADH_ALIGN;
uint16_t *sum4;
if( h->frames.b_have_sub8x8_esa )
{
=====================================
encoder/analyse.c
=====================================
@@ -223,10 +223,10 @@ void x264_analyse_weight_frame( x264_t *h, int end )
if( h->sh.weight[j][0].weightfn )
{
x264_frame_t *frame = h->fref[0][j];
- int width = frame->i_width[0] + 2*PADH;
+ int width = frame->i_width[0] + PADH2;
int i_padv = PADV << PARAM_INTERLACED;
int offset, height;
- pixel *src = frame->filtered[0][0] - frame->i_stride[0]*i_padv - PADH;
+ pixel *src = frame->filtered[0][0] - frame->i_stride[0]*i_padv - PADH_ALIGN;
height = X264_MIN( 16 + end + i_padv, h->fref[0][j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
offset = h->fenc->i_lines_weighted*frame->i_stride[0];
h->fenc->i_lines_weighted += height;
@@ -234,7 +234,7 @@ void x264_analyse_weight_frame( x264_t *h, int end )
for( int k = j; k < h->i_ref[0]; k++ )
if( h->sh.weight[k][0].weightfn )
{
- pixel *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH;
+ pixel *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH_ALIGN;
x264_weight_scale_plane( h, dst + offset, frame->i_stride[0],
src + offset, frame->i_stride[0],
width, height, &h->sh.weight[k][0] );
=====================================
encoder/encoder.c
=====================================
@@ -2185,14 +2185,14 @@ static void weighted_pred_init( x264_t *h )
assert( h->sh.weight[j][i].i_denom == denom );
if( !i )
{
- h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] + h->fenc->i_stride[0] * i_padv + PADH;
+ h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] + h->fenc->i_stride[0] * i_padv + PADH_ALIGN;
//scale full resolution frame
if( h->param.i_threads == 1 )
{
- pixel *src = h->fref[0][j]->filtered[0][0] - h->fref[0][j]->i_stride[0]*i_padv - PADH;
- pixel *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
+ pixel *src = h->fref[0][j]->filtered[0][0] - h->fref[0][j]->i_stride[0]*i_padv - PADH_ALIGN;
+ pixel *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH_ALIGN;
int stride = h->fenc->i_stride[0];
- int width = h->fenc->i_width[0] + PADH*2;
+ int width = h->fenc->i_width[0] + PADH2;
int height = h->fenc->i_lines[0] + i_padv*2;
x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
h->fenc->i_lines_weighted = height;
=====================================
encoder/slicetype.c
=====================================
@@ -112,7 +112,6 @@ static NOINLINE void weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x26
{
int ref0_distance = fenc->i_frame - ref->i_frame - 1;
int i_stride = fenc->i_stride[1];
- int i_offset = i_stride / 2;
int i_lines = fenc->i_lines[1];
int i_width = fenc->i_width[1];
int v_shift = CHROMA_V_SHIFT;
@@ -136,7 +135,7 @@ static NOINLINE void weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x26
}
else
h->mc.plane_copy_deinterleave( dstu, i_stride, dstv, i_stride, ref->plane[1], i_stride, cw, ch );
- h->mc.plane_copy_deinterleave( dstu+i_offset, i_stride, dstv+i_offset, i_stride, fenc->plane[1], i_stride, cw, ch );
+ h->mc.plane_copy_deinterleave( dstu+i_width, i_stride, dstv+i_width, i_stride, fenc->plane[1], i_stride, cw, ch );
x264_emms();
}
@@ -228,7 +227,7 @@ static NOINLINE unsigned int weight_cost_chroma( x264_t *h, x264_frame_t *fenc,
int i_stride = fenc->i_stride[1];
int i_lines = fenc->i_lines[1];
int i_width = fenc->i_width[1];
- pixel *src = ref + (i_stride >> 1);
+ pixel *src = ref + i_width;
ALIGNED_ARRAY_16( pixel, buf, [8*16] );
int pixoff = 0;
int height = 16 >> CHROMA_V_SHIFT;
@@ -493,11 +492,11 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
//scale lowres in lookahead for slicetype_frame_cost
pixel *src = ref->buffer_lowres;
pixel *dst = h->mb.p_weight_buf[0];
- int width = ref->i_width_lowres + PADH*2;
+ int width = ref->i_width_lowres + PADH2;
int height = ref->i_lines_lowres + PADV*2;
x264_weight_scale_plane( h, dst, ref->i_stride_lowres, src, ref->i_stride_lowres,
width, height, &weights[0] );
- fenc->weighted[0] = h->mb.p_weight_buf[0] + PADH + ref->i_stride_lowres * PADV;
+ fenc->weighted[0] = h->mb.p_weight_buf[0] + PADH_ALIGN + ref->i_stride_lowres * PADV;
}
}
View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/35417dcd65a57321fbadf98c9a4cff1cb741db4b...b3aadb76329d3c2aedac85142441476bbe5f002c
--
View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/35417dcd65a57321fbadf98c9a4cff1cb741db4b...b3aadb76329d3c2aedac85142441476bbe5f002c
You're receiving this email because of your account on code.videolan.org.
More information about the x264-devel
mailing list