[x264-devel] [Git][videolan/x264][stable] 10 commits: Makefile: Drop the -T argument to install

Mon Apr 12 20:31:50 UTC 2021


Anton Mitrofanov pushed to branch stable at VideoLAN / x264


Commits:
35417dcd by Henrik Gramner at 2021-01-26T02:21:16+01:00
Makefile: Drop the -T argument to install

It's not required, and BSD doesn't support it.

- - - - -
e32bff16 by Anton Mitrofanov at 2021-01-26T21:49:17+03:00
Fix alignment of chroma buffer for weightp

In 10-bit mode pixel_asd8 expects 16-byte alignment for pix1 and pix2.

- - - - -
b3aadb76 by Anton Mitrofanov at 2021-01-26T21:49:17+03:00
Fix PADH alignment

Make pointers to padded buffers aligned both before and after padding.

- - - - -
59c06095 by Anton Mitrofanov at 2021-01-27T21:25:48+03:00
x86inc: Fix LOAD_MM_PERMUTATION for AVX-512

- - - - -
a7e2c6ab by Anton Mitrofanov at 2021-02-01T22:32:37+03:00
Fix VBV overflow check for B-frames

- - - - -
ee62f14d by Anton Mitrofanov at 2021-02-09T00:25:32+03:00
CI: Update macos URL to vlc-contrib

- - - - -
fa264466 by Anton Mitrofanov at 2021-02-10T21:58:32+01:00
Fix MB stats

Bug report by Zhengzhi Duan.

- - - - -
38a76c7a by Henrik Gramner at 2021-02-11T14:24:57+01:00
Silence false positive -Wformat-truncation warning

- - - - -
b86ae3c6 by Henrik Gramner at 2021-02-11T15:09:43+01:00
x86inc: Add stack probing on Windows

Large stack allocations on Windows need to use stack probing in order
to guarantee that all stack memory is committed before accessing it.
This is done by ensuring that the guard page(s) at the end of the
currently committed pages are touched prior to any pages beyond that.

- - - - -
55d517bc by Martin Storsjö at 2021-04-12T09:54:56+03:00
aarch64: Fix the zigzag_interleave_8x8_cavlc_neon function

Use 'cmhs' (which does an unsigned greater or equal comparison)
instead of 'cmhi' (which does an unsigned greater comparison).

This makes sure that dct coeffs with a magnitude of 1 are recognized
in the output nnz buffer.

- - - - -


11 changed files:

- .gitlab-ci.yml
- Makefile
- common/aarch64/dct-a.S
- common/frame.c
- common/frame.h
- common/mc.c
- common/x86/x86inc.asm
- encoder/analyse.c
- encoder/encoder.c
- encoder/ratecontrol.c
- encoder/slicetype.c


Changes:

=====================================
.gitlab-ci.yml
=====================================
@@ -39,7 +39,7 @@ stages:
     _TRIPLET: "x86_64-apple-darwin19"
     _PLATFORMSUFFIX: ""
     _WRAPPER: ""
-    _CONTRIB_URL: "https://artifacts.videolan.org/vlc/macos/"
+    _CONTRIB_URL: "https://artifacts.videolan.org/vlc/macos-x86_64/"
 
 .build:
     stage: build


=====================================
Makefile
=====================================
@@ -414,7 +414,7 @@ endif
 install-bashcompletion:
 ifneq ($(BASHCOMPLETIONSDIR),)
 	$(INSTALL) -d $(DESTDIR)$(BASHCOMPLETIONSDIR)
-	$(INSTALL) -m 644 -T $(SRCPATH)/tools/bash-autocomplete.sh $(DESTDIR)$(BASHCOMPLETIONSDIR)/x264
+	$(INSTALL) -m 644 $(SRCPATH)/tools/bash-autocomplete.sh $(DESTDIR)$(BASHCOMPLETIONSDIR)/x264
 endif
 
 uninstall:


=====================================
common/aarch64/dct-a.S
=====================================
@@ -707,7 +707,7 @@ function zigzag_interleave_8x8_cavlc_neon, export=1
     umaxp      v16.8h, v16.8h, v18.8h
     st1        {v2.8h}, [x0],  #16
     st1        {v6.8h}, [x0],  #16
-    cmhi       v16.4s, v16.4s, v31.4s
+    cmhs       v16.4s, v16.4s, v31.4s
     st1        {v3.8h}, [x0],  #16
     and        v16.16b, v16.16b, v31.16b
     st1        {v7.8h}, [x0],  #16


=====================================
common/frame.c
=====================================
@@ -38,7 +38,7 @@ static int align_stride( int x, int align, int disalign )
 static int align_plane_size( int x, int disalign )
 {
     if( !(x&(disalign-1)) )
-        x += 128;
+        x += X264_MAX( 128, NATIVE_ALIGN ) / SIZEOF_PIXEL;
     return x;
 }
 
@@ -63,29 +63,28 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
     int i_mb_count = h->mb.i_mb_count;
     int i_stride, i_width, i_lines, luma_plane_count;
     int i_padv = PADV << PARAM_INTERLACED;
-    int align = 16;
+    int align = NATIVE_ALIGN / SIZEOF_PIXEL;
 #if ARCH_X86 || ARCH_X86_64
     if( h->param.cpu&X264_CPU_CACHELINE_64 || h->param.cpu&X264_CPU_AVX512 )
-        align = 64;
+        align = 64 / SIZEOF_PIXEL;
     else if( h->param.cpu&X264_CPU_CACHELINE_32 || h->param.cpu&X264_CPU_AVX )
-        align = 32;
+        align = 32 / SIZEOF_PIXEL;
+    else
+        align = 16 / SIZEOF_PIXEL;
 #endif
 #if ARCH_PPC
-    int disalign = 1<<9;
+    int disalign = (1<<9) / SIZEOF_PIXEL;
 #else
-    int disalign = 1<<10;
+    int disalign = (1<<10) / SIZEOF_PIXEL;
 #endif
 
-    /* ensure frame alignment after PADH is added */
-    int padh_align = X264_MAX( align - PADH * SIZEOF_PIXEL, 0 ) / SIZEOF_PIXEL;
-
     CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
     PREALLOC_INIT
 
     /* allocate frame data (+64 for extra data for me) */
     i_width  = h->mb.i_mb_width*16;
     i_lines  = h->mb.i_mb_height*16;
-    i_stride = align_stride( i_width + 2*PADH, align, disalign );
+    i_stride = align_stride( i_width + PADH2, align, disalign );
 
     if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
     {
@@ -123,7 +122,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
     frame->i_csp = i_csp;
     frame->i_width_lowres = frame->i_width[0]/2;
     frame->i_lines_lowres = frame->i_lines[0]/2;
-    frame->i_stride_lowres = align_stride( frame->i_width_lowres + 2*PADH, align, disalign<<1 );
+    frame->i_stride_lowres = align_stride( frame->i_width_lowres + PADH2, align, disalign<<1 );
 
     for( int i = 0; i < h->param.i_bframe + 2; i++ )
         for( int j = 0; j < h->param.i_bframe + 2; j++ )
@@ -152,9 +151,9 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
     {
         int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
         int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
-        PREALLOC( frame->buffer[1], (chroma_plane_size + padh_align) * SIZEOF_PIXEL );
+        PREALLOC( frame->buffer[1], chroma_plane_size * SIZEOF_PIXEL );
         if( PARAM_INTERLACED )
-            PREALLOC( frame->buffer_fld[1], (chroma_plane_size + padh_align) * SIZEOF_PIXEL );
+            PREALLOC( frame->buffer_fld[1], chroma_plane_size * SIZEOF_PIXEL );
     }
 
     /* all 4 luma planes allocated together, since the cacheline split code
@@ -167,9 +166,9 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
             luma_plane_size *= 4;
 
         /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
-        PREALLOC( frame->buffer[p], (luma_plane_size + padh_align) * SIZEOF_PIXEL );
+        PREALLOC( frame->buffer[p], luma_plane_size * SIZEOF_PIXEL );
         if( PARAM_INTERLACED )
-            PREALLOC( frame->buffer_fld[p], (luma_plane_size + padh_align) * SIZEOF_PIXEL );
+            PREALLOC( frame->buffer_fld[p], luma_plane_size * SIZEOF_PIXEL );
     }
 
     frame->b_duplicate = 0;
@@ -207,7 +206,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
         {
             int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
 
-            PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * SIZEOF_PIXEL );
+            PREALLOC( frame->buffer_lowres, 4 * luma_plane_size * SIZEOF_PIXEL );
 
             for( int j = 0; j <= !!h->param.i_bframe; j++ )
                 for( int i = 0; i <= h->param.i_bframe; i++ )
@@ -237,9 +236,9 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
     if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
     {
         int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
-        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH + padh_align;
+        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH_ALIGN;
         if( PARAM_INTERLACED )
-            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH + padh_align;
+            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH_ALIGN;
     }
 
     for( int p = 0; p < luma_plane_count; p++ )
@@ -249,18 +248,18 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
         {
             for( int i = 0; i < 4; i++ )
             {
-                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align;
+                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH_ALIGN;
                 if( PARAM_INTERLACED )
-                    frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align;
+                    frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH_ALIGN;
             }
             frame->plane[p] = frame->filtered[p][0];
             frame->plane_fld[p] = frame->filtered_fld[p][0];
         }
         else
         {
-            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH + padh_align;
+            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH_ALIGN;
             if( PARAM_INTERLACED )
-                frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH + padh_align;
+                frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH_ALIGN;
         }
     }
 
@@ -270,7 +269,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
         frame->mv16x16++;
 
         if( h->param.analyse.i_me_method >= X264_ME_ESA )
-            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
+            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH_ALIGN;
     }
     else
     {
@@ -278,7 +277,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
         {
             int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
             for( int i = 0; i < 4; i++ )
-                frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size;
+                frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH_ALIGN + i * luma_plane_size;
 
             for( int j = 0; j <= !!h->param.i_bframe; j++ )
                 for( int i = 0; i <= h->param.i_bframe; i++ )


=====================================
common/frame.h
=====================================
@@ -31,6 +31,8 @@
 /* number of pixels past the edge of the frame, for motion estimation/compensation */
 #define PADH 32
 #define PADV 32
+#define PADH_ALIGN X264_MAX( PADH, NATIVE_ALIGN / SIZEOF_PIXEL )
+#define PADH2 (PADH_ALIGN + PADH)
 
 typedef struct x264_frame
 {


=====================================
common/mc.c
=====================================
@@ -749,15 +749,15 @@ void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
         int stride = frame->i_stride[0];
         if( start < 0 )
         {
-            memset( frame->integral - PADV * stride - PADH, 0, stride * sizeof(uint16_t) );
+            memset( frame->integral - PADV * stride - PADH_ALIGN, 0, stride * sizeof(uint16_t) );
             start = -PADV;
         }
         if( b_end )
             height += PADV-9;
         for( int y = start; y < height; y++ )
         {
-            pixel    *pix  = frame->plane[0] + y * stride - PADH;
-            uint16_t *sum8 = frame->integral + (y+1) * stride - PADH;
+            pixel    *pix  = frame->plane[0] + y * stride - PADH_ALIGN;
+            uint16_t *sum8 = frame->integral + (y+1) * stride - PADH_ALIGN;
             uint16_t *sum4;
             if( h->frames.b_have_sub8x8_esa )
             {


=====================================
common/x86/x86inc.asm
=====================================
@@ -356,6 +356,28 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
 %define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512)))
 %define high_mm_regs (16*cpuflag(avx512))
 
+; Large stack allocations on Windows need to use stack probing in order
+; to guarantee that all stack memory is committed before accessing it.
+; This is done by ensuring that the guard page(s) at the end of the
+; currently committed pages are touched prior to any pages beyond that.
+%if WIN64
+    %assign STACK_PROBE_SIZE 8192
+%elifidn __OUTPUT_FORMAT__, win32
+    %assign STACK_PROBE_SIZE 4096
+%else
+    %assign STACK_PROBE_SIZE 0
+%endif
+
+%macro PROBE_STACK 1 ; stack_size
+    %if STACK_PROBE_SIZE
+        %assign %%i STACK_PROBE_SIZE
+        %rep %1 / STACK_PROBE_SIZE
+            mov eax, [rsp-%%i]
+            %assign %%i %%i+STACK_PROBE_SIZE
+        %endrep
+    %endif
+%endmacro
+
 %macro ALLOC_STACK 0-2 0, 0 ; stack_size, n_xmm_regs (for win64 only)
     %ifnum %1
         %if %1 != 0
@@ -376,6 +398,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
             %if required_stack_alignment <= STACK_ALIGNMENT
                 ; maintain the current stack alignment
                 %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
+                PROBE_STACK stack_size_padded
                 SUB rsp, stack_size_padded
             %else
                 %assign %%reg_num (regs_used - 1)
@@ -391,6 +414,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
                     %xdefine rstkm rstk
                 %endif
                 %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
+                PROBE_STACK stack_size_padded
                 mov rstk, rsp
                 and rsp, ~(required_stack_alignment-1)
                 sub rsp, stack_size_padded
@@ -1139,7 +1163,7 @@ INIT_XMM
     %endif
     %xdefine %%tmp %%f %+ 0
     %ifnum %%tmp
-        RESET_MM_PERMUTATION
+        DEFINE_MMREGS mmtype
         %assign %%i 0
         %rep num_mmregs
             %xdefine %%tmp %%f %+ %%i


=====================================
encoder/analyse.c
=====================================
@@ -223,10 +223,10 @@ void x264_analyse_weight_frame( x264_t *h, int end )
         if( h->sh.weight[j][0].weightfn )
         {
             x264_frame_t *frame = h->fref[0][j];
-            int width = frame->i_width[0] + 2*PADH;
+            int width = frame->i_width[0] + PADH2;
             int i_padv = PADV << PARAM_INTERLACED;
             int offset, height;
-            pixel *src = frame->filtered[0][0] - frame->i_stride[0]*i_padv - PADH;
+            pixel *src = frame->filtered[0][0] - frame->i_stride[0]*i_padv - PADH_ALIGN;
             height = X264_MIN( 16 + end + i_padv, h->fref[0][j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
             offset = h->fenc->i_lines_weighted*frame->i_stride[0];
             h->fenc->i_lines_weighted += height;
@@ -234,7 +234,7 @@ void x264_analyse_weight_frame( x264_t *h, int end )
                 for( int k = j; k < h->i_ref[0]; k++ )
                     if( h->sh.weight[k][0].weightfn )
                     {
-                        pixel *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH;
+                        pixel *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH_ALIGN;
                         x264_weight_scale_plane( h, dst + offset, frame->i_stride[0],
                                                  src + offset, frame->i_stride[0],
                                                  width, height, &h->sh.weight[k][0] );
@@ -450,7 +450,7 @@ static void mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
                 IS_INTRA( h->mb.i_mb_type_topleft ) ||
                 IS_INTRA( h->mb.i_mb_type_topright ) ||
                 (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref[0][0]->mb_type[h->mb.i_mb_xy] )) ||
-                (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) )
+                (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16] + h->stat.frame.i_mb_count[I_PCM])) )
             { /* intra is likely */ }
             else
             {


=====================================
encoder/encoder.c
=====================================
@@ -1787,11 +1787,12 @@ x264_t *x264_encoder_open( x264_param_t *param, void *api )
                           h->sps->i_profile_idc == PROFILE_HIGH422 ?
                               (h->sps->b_constraint_set3 ? "High 4:2:2 Intra" : "High 4:2:2") :
                           h->sps->b_constraint_set3 ? "High 4:4:4 Intra" : "High 4:4:4 Predictive";
-    char level[4];
-    snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
+    char level[16];
     if( h->sps->i_level_idc == 9 || ( h->sps->i_level_idc == 11 && h->sps->b_constraint_set3 &&
         (h->sps->i_profile_idc == PROFILE_BASELINE || h->sps->i_profile_idc == PROFILE_MAIN) ) )
         strcpy( level, "1b" );
+    else
+        snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc / 10, h->sps->i_level_idc % 10 );
 
     static const char * const subsampling[4] = { "4:0:0", "4:2:0", "4:2:2", "4:4:4" };
     x264_log( h, X264_LOG_INFO, "profile %s, level %s, %s, %d-bit\n",
@@ -2185,14 +2186,14 @@ static void weighted_pred_init( x264_t *h )
                     assert( h->sh.weight[j][i].i_denom == denom );
                     if( !i )
                     {
-                        h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] + h->fenc->i_stride[0] * i_padv + PADH;
+                        h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] + h->fenc->i_stride[0] * i_padv + PADH_ALIGN;
                         //scale full resolution frame
                         if( h->param.i_threads == 1 )
                         {
-                            pixel *src = h->fref[0][j]->filtered[0][0] - h->fref[0][j]->i_stride[0]*i_padv - PADH;
-                            pixel *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
+                            pixel *src = h->fref[0][j]->filtered[0][0] - h->fref[0][j]->i_stride[0]*i_padv - PADH_ALIGN;
+                            pixel *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH_ALIGN;
                             int stride = h->fenc->i_stride[0];
-                            int width = h->fenc->i_width[0] + PADH*2;
+                            int width = h->fenc->i_width[0] + PADH2;
                             int height = h->fenc->i_lines[0] + i_padv*2;
                             x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
                             h->fenc->i_lines_weighted = height;
@@ -4275,14 +4276,14 @@ void    x264_encoder_close  ( x264_t *h )
         int64_t i_i8x8 = SUM3b( h->stat.i_mb_count, I_8x8 );
         int64_t i_intra = i_i8x8 + SUM3b( h->stat.i_mb_count, I_4x4 )
                                  + SUM3b( h->stat.i_mb_count, I_16x16 );
-        int64_t i_all_intra = i_intra + SUM3b( h->stat.i_mb_count, I_PCM);
+        int64_t i_all_intra = i_intra + SUM3b( h->stat.i_mb_count, I_PCM );
         int64_t i_skip = SUM3b( h->stat.i_mb_count, P_SKIP )
                        + SUM3b( h->stat.i_mb_count, B_SKIP );
         const int i_count = h->stat.i_frame_count[SLICE_TYPE_I] +
                             h->stat.i_frame_count[SLICE_TYPE_P] +
                             h->stat.i_frame_count[SLICE_TYPE_B];
         int64_t i_mb_count = (int64_t)i_count * h->mb.i_mb_count;
-        int64_t i_inter = i_mb_count - i_skip - i_intra;
+        int64_t i_inter = i_mb_count - i_skip - i_all_intra;
         const double duration = h->stat.f_frame_duration[SLICE_TYPE_I] +
                                 h->stat.f_frame_duration[SLICE_TYPE_P] +
                                 h->stat.f_frame_duration[SLICE_TYPE_B];
@@ -4297,7 +4298,7 @@ void    x264_encoder_close  ( x264_t *h )
             if( i_skip )
                 fieldstats += sprintf( fieldstats, " skip:%.1f%%", h->stat.i_mb_field[2] * 100.0 / i_skip );
             x264_log( h, X264_LOG_INFO, "field mbs: intra: %.1f%%%s\n",
-                      h->stat.i_mb_field[0] * 100.0 / i_intra, buf );
+                      h->stat.i_mb_field[0] * 100.0 / i_all_intra, buf );
         }
 
         if( h->pps->b_transform_8x8_mode )
@@ -4305,7 +4306,7 @@ void    x264_encoder_close  ( x264_t *h )
             buf[0] = 0;
             if( h->stat.i_mb_count_8x8dct[0] )
                 sprintf( buf, " inter:%.1f%%", 100. * h->stat.i_mb_count_8x8dct[1] / h->stat.i_mb_count_8x8dct[0] );
-            x264_log( h, X264_LOG_INFO, "8x8 transform intra:%.1f%%%s\n", 100. * i_i8x8 / i_intra, buf );
+            x264_log( h, X264_LOG_INFO, "8x8 transform intra:%.1f%%%s\n", 100. * i_i8x8 / X264_MAX( i_intra, 1 ), buf );
         }
 
         if( (h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO ||


=====================================
encoder/ratecontrol.c
=====================================
@@ -1829,9 +1829,9 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
     x264_emms();
 
     h->stat.frame.i_mb_count_skip = mbs[P_SKIP] + mbs[B_SKIP];
-    h->stat.frame.i_mb_count_i = mbs[I_16x16] + mbs[I_8x8] + mbs[I_4x4];
+    h->stat.frame.i_mb_count_i = mbs[I_16x16] + mbs[I_8x8] + mbs[I_4x4] + mbs[I_PCM];
     h->stat.frame.i_mb_count_p = mbs[P_L0] + mbs[P_8x8];
-    for( int i = B_DIRECT; i < B_8x8; i++ )
+    for( int i = B_DIRECT; i <= B_8x8; i++ )
         h->stat.frame.i_mb_count_p += mbs[i];
 
     h->fdec->f_qp_avg_rc = rc->qpa_rc /= h->mb.i_mb_count;
@@ -2350,7 +2350,10 @@ static double clip_qscale( x264_t *h, int pict_type, double q )
                 bframe_cpb_duration += h->fenc->f_planned_cpb_duration[i];
 
             if( bbits * nb > bframe_cpb_duration * rcc->vbv_max_rate )
+            {
                 nb = 0;
+                bframe_cpb_duration = 0;
+            }
             pbbits += nb * bbits;
 
             minigop_cpb_duration = bframe_cpb_duration + fenc_cpb_duration;


=====================================
encoder/slicetype.c
=====================================
@@ -112,7 +112,6 @@ static NOINLINE void weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x26
 {
     int ref0_distance = fenc->i_frame - ref->i_frame - 1;
     int i_stride = fenc->i_stride[1];
-    int i_offset = i_stride / 2;
     int i_lines = fenc->i_lines[1];
     int i_width = fenc->i_width[1];
     int v_shift = CHROMA_V_SHIFT;
@@ -136,7 +135,7 @@ static NOINLINE void weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x26
     }
     else
         h->mc.plane_copy_deinterleave( dstu, i_stride, dstv, i_stride, ref->plane[1], i_stride, cw, ch );
-    h->mc.plane_copy_deinterleave( dstu+i_offset, i_stride, dstv+i_offset, i_stride, fenc->plane[1], i_stride, cw, ch );
+    h->mc.plane_copy_deinterleave( dstu+i_width, i_stride, dstv+i_width, i_stride, fenc->plane[1], i_stride, cw, ch );
     x264_emms();
 }
 
@@ -228,7 +227,7 @@ static NOINLINE unsigned int weight_cost_chroma( x264_t *h, x264_frame_t *fenc,
     int i_stride = fenc->i_stride[1];
     int i_lines = fenc->i_lines[1];
     int i_width = fenc->i_width[1];
-    pixel *src = ref + (i_stride >> 1);
+    pixel *src = ref + i_width;
     ALIGNED_ARRAY_16( pixel, buf, [8*16] );
     int pixoff = 0;
     int height = 16 >> CHROMA_V_SHIFT;
@@ -493,11 +492,11 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
         //scale lowres in lookahead for slicetype_frame_cost
         pixel *src = ref->buffer_lowres;
         pixel *dst = h->mb.p_weight_buf[0];
-        int width = ref->i_width_lowres + PADH*2;
+        int width = ref->i_width_lowres + PADH2;
         int height = ref->i_lines_lowres + PADV*2;
         x264_weight_scale_plane( h, dst, ref->i_stride_lowres, src, ref->i_stride_lowres,
                                  width, height, &weights[0] );
-        fenc->weighted[0] = h->mb.p_weight_buf[0] + PADH + ref->i_stride_lowres * PADV;
+        fenc->weighted[0] = h->mb.p_weight_buf[0] + PADH_ALIGN + ref->i_stride_lowres * PADV;
     }
 }
 



View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/544c61f082194728d0391fb280a6e138ba320a96...55d517bc4569272a2c9a367a4106c234aba2ffbc

-- 
View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/544c61f082194728d0391fb280a6e138ba320a96...55d517bc4569272a2c9a367a4106c234aba2ffbc
You're receiving this email because of your account on code.videolan.org.