[x264-devel] [PATCH] Added support for CABAC zero bytes insertion
Jay N. Shingala
jay.shingala at ittiam.com
Fri Apr 12 13:23:47 CEST 2019
Dear x264 developers,
This query is regarding the patch submitted (quite a while ago) on CABAC zero word insertion which is a requirement for bit-stream conformance.
For reference, here is an excerpt of section 7.4.2.10 of AVC/H.264 specification describing the need for zero word insertion when the CABAC bin count to bits count ratio is higher than constrained limits.
"cabac_zero_word is a byte-aligned sequence of two bytes equal to 0x0000.
Let NumBytesInVclNALunits be the sum of the values of NumBytesInNALunit for all VCL NAL units of a coded picture
Let BinCountsInNALunits be the number of times that the parsing process function DecodeBin( ), specified in
clause 9.3.3.2, is invoked to decode the contents of all VCL NAL units of a coded picture. When
entropy_coding_mode_flag is equal to 1, it is a requirement of bitstream conformance that BinCountsInNALunits shall
not exceed ( 32 ÷ 3 ) * NumBytesInVclNALunits + ( RawMbBits * PicSizeInMbs ) ÷ 32.
NOTE – The constraint on the maximum number of bins resulting from decoding the contents of the slice layer NAL units can be
met by inserting a number of cabac_zero_word syntax elements to increase the value of NumBytesInVclNALunits. Each
cabac_zero_word is represented in a NAL unit by the three-byte sequence 0x000003 (as a result of the constraints on NAL unit
contents that result in requiring inclusion of an emulation_prevention_three_byte for each cabac_zero_word)."
This patch will be useful for strict bit stream conformance in x264.
It is important to note that the overall performance impact was negligible as the latency cycle of "bin_cnt" incrementing in cabac_encode_decision() and cabac_encode_bypass() is well hidden.
Request you to please provide comments on the conformance requirement and the suitability of this patch in x264.
Thank you,
Jay
-----Original Message-----
From: Jay N. Shingala
Sent: Friday, October 26, 2018 4:50 PM
To: x264-devel at videolan.org
Subject: [x264-devel] [PATCH] Added support for CABAC zero bytes insertion
From 658dc3f6af9fb2dd4d776b4afb70dae1110fd5d9 Mon Sep 17 00:00:00 2001
From: "Jay N. Shingala" <100153 at ittiam.com>
Date: Tue, 11 Sep 2018 21:03:40 +0530
Subject: [PATCH] Added support for cabac zero bytes insertion
Following changes done for zero bytes insertion to meet the constraints of clause 7.4.2.10 in specification.
- Added “i_bin_cnt” to "x264_cabac_t" for counting total bins
i_bin_cnt is unconditionally incremented in all cabac decision and bypass mode modules (both C and ASM)
- Added “i_bin_cnt” to "x264_frame_stat_t" as part of frame stats
Gets assigned from cabac structure to frame structure at the end of slice
Recommended to not move placement of i_bin_cnt in this structure. Current position ensures the accumulation of bin counter across multiple slices at end of frame (in threaded_slices_write()).
- Added macro “INSERT_CABAC_ZERO_WORD” to insert cabac zero bytes
"encoder_insert_cabaczerowords()" is the new function which adds cabac zero bytes as per clause 7.4.2.10 at the end of frame
---
common/aarch64/asm-offsets.c | 1 +
common/aarch64/asm-offsets.h | 1 +
common/aarch64/cabac-a.S | 10 +++++++
common/cabac.c | 6 +++++
common/cabac.h | 3 +++
common/common.h | 2 ++
common/x86/cabac-a.asm | 20 ++++++++------x
encoder/encoder.c | 62 ++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 97 insertions(+), 8 deletions(-)
diff --git a/common/aarch64/asm-offsets.c b/common/aarch64/asm-offsets.c index db88e9c4..81bd4f8f 100644
--- a/common/aarch64/asm-offsets.c
+++ b/common/aarch64/asm-offsets.c
@@ -38,5 +38,6 @@ X264_CHECK_OFFSET(x264_cabac_t, i_bytes_outstanding, CABAC_I_BYTES_OUTSTANDING);
X264_CHECK_OFFSET(x264_cabac_t, p_start, CABAC_P_START);
X264_CHECK_OFFSET(x264_cabac_t, p, CABAC_P);
X264_CHECK_OFFSET(x264_cabac_t, p_end, CABAC_P_END);
+X264_CHECK_OFFSET(x264_cabac_t, i_bin_cnt, CABAC_BIN_COUNT);
X264_CHECK_OFFSET(x264_cabac_t, f8_bits_encoded, CABAC_F8_BITS_ENCODED);
X264_CHECK_OFFSET(x264_cabac_t, state, CABAC_STATE);
diff --git a/common/aarch64/asm-offsets.h b/common/aarch64/asm-offsets.h index 81184de1..c19655ff 100644
--- a/common/aarch64/asm-offsets.h
+++ b/common/aarch64/asm-offsets.h
@@ -33,6 +33,7 @@
#define CABAC_P_START 0x10
#define CABAC_P 0x18
#define CABAC_P_END 0x20
+#define CABAC_BIN_COUNT 0x28
#define CABAC_F8_BITS_ENCODED 0x30
#define CABAC_STATE 0x34
diff --git a/common/aarch64/cabac-a.S b/common/aarch64/cabac-a.S index 9abb14b2..0346bed3 100644
--- a/common/aarch64/cabac-a.S
+++ b/common/aarch64/cabac-a.S
@@ -28,6 +28,7 @@
// w11 holds x264_cabac_t.i_low
// w12 holds x264_cabac_t.i_range
+// w7 holds x264_cabac_t.i_bin_cnt
function cabac_encode_decision_asm, export=1
movrel x8, X264(cabac_range_lps)
@@ -35,6 +36,7 @@ function cabac_encode_decision_asm, export=1
add w10, w1, #CABAC_STATE
ldrb w3, [x0, x10] // i_state
ldr w12, [x0, #CABAC_I_RANGE]
+ ldr w7, [x0, #CABAC_BIN_COUNT]
and x4, x3, #~1
asr w5, w12, #6
add x8, x8, x4, lsl #1
@@ -42,7 +44,9 @@ function cabac_encode_decision_asm, export=1
eor w6, w2, w3 // b ^ i_state
ldrb w4, [x8, x5] // i_range_lps
ldr w11, [x0, #CABAC_I_LOW]
+adds w7, w7, #1 // i_bin_cnt += 1
sub w12, w12, w4
+str w7, [x0, #CABAC_BIN_COUNT]
tbz w6, #0, 1f // (b ^ i_state) & 1
add w11, w11, w12
mov w12, w4
@@ -103,11 +107,14 @@ endfunc
function cabac_encode_bypass_asm, export=1
ldr w12, [x0, #CABAC_I_RANGE]
+ ldr w7, [x0, #CABAC_BIN_COUNT]
ldr w11, [x0, #CABAC_I_LOW]
ldr w2, [x0, #CABAC_I_QUEUE]
and w1, w1, w12
add w11, w1, w11, lsl #1
+ adds w7, w7, #1 // i_bin_cnt += 1
adds w2, w2, #1
+ str w7, [x0, #CABAC_BIN_COUNT]
b.ge cabac_putbyte
str w11, [x0, #CABAC_I_LOW]
str w2, [x0, #CABAC_I_QUEUE]
@@ -116,7 +123,10 @@ endfunc
function cabac_encode_terminal_asm, export=1
ldr w12, [x0, #CABAC_I_RANGE]
+ ldr w7, [x0, #CABAC_BIN_COUNT]
ldr w11, [x0, #CABAC_I_LOW]
sub w12, w12, #2
+ adds w7, w7, #1 // i_bin_cnt += 1
+ str w7, [x0, #CABAC_BIN_COUNT]
b cabac_encode_renorm
endfunc
diff --git a/common/cabac.c b/common/cabac.c index 8c3e72af..f2f2b01d 100644
--- a/common/cabac.c
+++ b/common/cabac.c
@@ -56,6 +56,7 @@ void x264_cabac_encode_init_core( x264_cabac_t *cb )
cb->i_range = 0x01FE;
cb->i_queue = -9; // the first bit will be shifted away and not written
cb->i_bytes_outstanding = 0;
+ cb->i_bin_cnt = 0;
}
void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end ) @@ -122,6 +123,7 @@ void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b )
}
cb->state[i_ctx] = x264_cabac_transition[i_state][b];
cabac_encode_renorm( cb );
+ cb->i_bin_cnt++;
}
/* Note: b is negated for this function */ @@ -131,6 +133,7 @@ void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b )
cb->i_low += b & cb->i_range;
cb->i_queue += 1;
cabac_putbyte( cb );
+ cb->i_bin_cnt++;
}
static const int bypass_lut[16] =
@@ -146,6 +149,8 @@ void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val )
uint32_t x = (bypass_lut[k-exp_bits]<<exp_bits) + v;
k = 2*k+1-exp_bits;
int i = ((k-1)&7)+1;
+
+ cb->i_bin_cnt += k;
do {
k -= i;
cb->i_low <<= i;
@@ -160,6 +165,7 @@ void x264_cabac_encode_terminal_c( x264_cabac_t *cb ) {
cb->i_range -= 2;
cabac_encode_renorm( cb );
+ cb->i_bin_cnt++;
}
void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb ) diff --git a/common/cabac.h b/common/cabac.h index b573416e..a29a8763 100644
--- a/common/cabac.h
+++ b/common/cabac.h
@@ -37,10 +37,13 @@ typedef struct
int i_queue; //stored with an offset of -8 for faster asm
int i_bytes_outstanding;
+
uint8_t *p_start;
uint8_t *p;
uint8_t *p_end;
+ int i_bin_cnt; // bin counter needed for cabac zero word padding of
+ access unit
+
/* aligned for memcpy_aligned starting here */
ALIGNED_64( int f8_bits_encoded ); // only if using x264_cabac_size_decision()
diff --git a/common/common.h b/common/common.h index 320206ef..febe11c3 100644
--- a/common/common.h
+++ b/common/common.h
@@ -259,6 +259,8 @@ typedef struct
int i_mb_field[3];
/* Adaptive direct mv pred */
int i_direct_score[2];
+ /* bin counter needed for cabac zero word padding of access unit */
+ int i_bin_cnt;
/* Metrics */
int64_t i_ssd[3];
double f_ssim;
diff --git a/common/x86/cabac-a.asm b/common/x86/cabac-a.asm index fcafd9c4..ae2d905c 100644
--- a/common/x86/cabac-a.asm
+++ b/common/x86/cabac-a.asm
@@ -115,6 +115,7 @@ struc cb
.start: pointer 1
.p: pointer 1
.end: pointer 1
+ .i_bin_cnt: resd 1
align 64, resb 1
.bits_encoded: resd 1
.state: resb 1024
@@ -143,12 +144,13 @@ endstruc
DECLARE_REG_TMP 0,4,2,1,3,5,6,2
%endif
-cglobal cabac_encode_decision_%1, 1,7
+cglobal cabac_encode_decision_%1, 1,7
movifnidn t1d, r1m
mov t5d, [r0+cb.range]
movzx t6d, byte [r0+cb.state+t1]
+ inc dword [r0+cb.i_bin_cnt]
movifnidn t0, r0 ; WIN64
- mov t4d, ~1
+ mov t4d, ~1
mov t3d, t5d
and t4d, t6d
shr t5d, 6
@@ -193,13 +195,14 @@ cglobal cabac_encode_decision_%1, 1,7
mov [t0+cb.queue], t3d
RET
-cglobal cabac_encode_bypass_%1, 2,3
+cglobal cabac_encode_bypass_%1, 2,3
mov t7d, [r0+cb.low]
and r1d, [r0+cb.range]
- lea t7d, [t7*2+r1]
- movifnidn t0, r0 ; WIN64
- mov t3d, [r0+cb.queue]
- inc t3d
+ inc dword [r0+cb.i_bin_cnt]
+ lea t7d, [t7*2+r1]
+ movifnidn t0, r0 ; WIN64
+ mov t3d, [r0+cb.queue]
+ inc t3d
%if ARCH_X86_64 ; .putbyte compiles to nothing but a jmp
jge cabac_putbyte_%1
%else
@@ -217,7 +220,8 @@ cglobal cabac_encode_bypass_%1, 2,3
%ifnidn %1,bmi2
cglobal cabac_encode_terminal_%1, 1,3
- sub dword [r0+cb.range], 2
+ inc dword [r0+cb.i_bin_cnt]
+ sub dword [r0+cb.range], 2
; shortcut: the renormalization shift in terminal ; can only be 0 or 1 and is zero over 99% of the time.
test dword [r0+cb.range], 0x100
diff --git a/encoder/encoder.c b/encoder/encoder.c index 7316a586..bb3f8d67 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -37,6 +37,7 @@
#endif
//#define DEBUG_MB_TYPE
+#define INSERT_CABAC_ZERO_WORD 1
#define bs_write_ue bs_write_ue_big
@@ -2005,6 +2006,55 @@ static int encoder_encapsulate_nals( x264_t *h, int start )
return nal_buffer - (h0->nal_buffer + previous_nal_size); }
+#if INSERT_CABAC_ZERO_WORD
+static int encoder_insert_cabaczerowords( x264_t *h, int frame_size ) {
+ int RawMbBits = 256 * h->param.i_bitdepth;
+ int min_num_bytes = 0;
+ int i_chroma_format_idc = h->sps->i_chroma_format_idc;
+
+ if(i_chroma_format_idc == CHROMA_420 )
+ RawMbBits = (RawMbBits * 3) / 2;
+ else if( i_chroma_format_idc == CHROMA_422 )
+ RawMbBits = (RawMbBits * 2);
+ else if( i_chroma_format_idc == CHROMA_444 )
+ RawMbBits = (RawMbBits * 3);
+
+ /* Check for cabac zero word stuffing based on output frame size, bin count and raw bits as per section 7.4.2.10
+ Also see section 9.3.4.6 Byte stuffing process */
+ min_num_bytes = ((96 * h->stat.frame.i_bin_cnt) - (RawMbBits * h->mb.i_mb_count * 3) + 1023) / 1024;
+ //printf("bin cnt = %d, min_num_bytes = %d, frame_size = %d, \n",h->stat.frame.i_bin_cnt, min_num_bytes, frame_size);
+ if(frame_size < min_num_bytes)
+ {
+ int stuffing_bytes = min_num_bytes - frame_size;
+ x264_nal_t *nal = &h->out.nal[h->out.i_nal-1];
+
+ /* If the required buffer size exceeds current allocated size, re-allocate the nal buffer.
+ Note that +2 guardband is due to insertion of cabac zero word with EPB (0x000003) in steps of 3 */
+ if( check_encapsulated_buffer( h, h->thread[0], h->out.i_nal, frame_size, (min_num_bytes+2)) < 0 )
+ return -1;
+
+ //printf ("Inserting %d/%d cabac_zero_word syntax elements/bytes (Clause 7.4.2.10)\n", ((stuffing_bytes + 2)/3), stuffing_bytes);
+ {
+ uint8_t *nal_buf = nal->p_payload + nal->i_payload;
+ int i;
+ for (i = 0; i < stuffing_bytes; i+=3 )
+ {
+ *nal_buf++ = 0x00; // CABAC zero word
+ *nal_buf++ = 0x00;
+ *nal_buf++ = 0x03;
+ }
+
+ nal->i_payload += i;
+ nal->i_padding += i;
+ frame_size += i;
+ }
+ }
+
+ return (frame_size);
+}
+#endif
+
/****************************************************************************
* x264_encoder_headers:
****************************************************************************/
@@ -2996,6 +3046,8 @@ cont:
{
x264_cabac_encode_flush( h, &h->cabac );
h->out.bs.p = h->cabac.p;
+
+ h->stat.frame.i_bin_cnt = h->cabac.i_bin_cnt;
}
else
{
@@ -3837,6 +3889,16 @@ static int encoder_frame_end( x264_t *h, x264_t *thread_current,
if( frame_size < 0 )
return -1;
+#if INSERT_CABAC_ZERO_WORD
+ if(h->param.b_cabac)
+ {
+ /* cabac zero word insertion; Clause 7.4.2.10 */
+ frame_size = encoder_insert_cabaczerowords(h, frame_size);
+ if( frame_size < 0 )
+ return -1;
+ }
+#endif
+
/* Set output picture properties */
pic_out->i_type = h->fenc->i_type;
--
2.13.0.windows.1
This is confidential Ittiam property.
More information about the x264-devel
mailing list