[x264-devel] [PATCH] Added support for CABAC zero bytes insertion
Jay N. Shingala
jay.shingala at ittiam.com
Fri Oct 26 13:20:20 CEST 2018
>From 658dc3f6af9fb2dd4d776b4afb70dae1110fd5d9 Mon Sep 17 00:00:00 2001
From: "Jay N. Shingala" <100153 at ittiam.com>
Date: Tue, 11 Sep 2018 21:03:40 +0530
Subject: [PATCH] Added support for cabac zero bytes insertion
Following changes done for zero bytes insertion to meet the constraints of clause 7.4.2.10 in specification.
- Added "i_bin_cnt" to "x264_cabac_t" for counting total bins
i_bin_cnt is unconditionally incremented in all cabac decision and bypass mode modules (both C and ASM)
- Added "i_bin_cnt" to "x264_frame_stat_t" as part of frame stats
Gets assigned from cabac structure to frame structure at the end of slice
Recommended to not move placement of i_bin_cnt in this structure. Current position ensures the accumulation of bin counter across multiple slices at end of frame (in threaded_slices_write()).
- Added macro "INSERT_CABAC_ZERO_WORD" to insert cabac zero bytes
"encoder_insert_cabaczerowords()" is the new function which adds cabac zero bytes as per clause 7.4.2.10 at the end of frame
---
common/aarch64/asm-offsets.c | 1 +
common/aarch64/asm-offsets.h | 1 +
common/aarch64/cabac-a.S | 10 +++++++
common/cabac.c | 6 +++++
common/cabac.h | 3 +++
common/common.h | 2 ++
common/x86/cabac-a.asm | 20 ++++++++------x
encoder/encoder.c | 62 ++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 97 insertions(+), 8 deletions(-)
diff --git a/common/aarch64/asm-offsets.c b/common/aarch64/asm-offsets.c
index db88e9c4..81bd4f8f 100644
--- a/common/aarch64/asm-offsets.c
+++ b/common/aarch64/asm-offsets.c
@@ -38,5 +38,6 @@ X264_CHECK_OFFSET(x264_cabac_t, i_bytes_outstanding, CABAC_I_BYTES_OUTSTANDING);
X264_CHECK_OFFSET(x264_cabac_t, p_start, CABAC_P_START);
X264_CHECK_OFFSET(x264_cabac_t, p, CABAC_P);
X264_CHECK_OFFSET(x264_cabac_t, p_end, CABAC_P_END);
+X264_CHECK_OFFSET(x264_cabac_t, i_bin_cnt, CABAC_BIN_COUNT);
X264_CHECK_OFFSET(x264_cabac_t, f8_bits_encoded, CABAC_F8_BITS_ENCODED);
X264_CHECK_OFFSET(x264_cabac_t, state, CABAC_STATE);
diff --git a/common/aarch64/asm-offsets.h b/common/aarch64/asm-offsets.h
index 81184de1..c19655ff 100644
--- a/common/aarch64/asm-offsets.h
+++ b/common/aarch64/asm-offsets.h
@@ -33,6 +33,7 @@
#define CABAC_P_START 0x10
#define CABAC_P 0x18
#define CABAC_P_END 0x20
+#define CABAC_BIN_COUNT 0x28
#define CABAC_F8_BITS_ENCODED 0x30
#define CABAC_STATE 0x34
diff --git a/common/aarch64/cabac-a.S b/common/aarch64/cabac-a.S
index 9abb14b2..0346bed3 100644
--- a/common/aarch64/cabac-a.S
+++ b/common/aarch64/cabac-a.S
@@ -28,6 +28,7 @@
// w11 holds x264_cabac_t.i_low
// w12 holds x264_cabac_t.i_range
+// w7 holds x264_cabac_t.i_bin_cnt
function cabac_encode_decision_asm, export=1
movrel x8, X264(cabac_range_lps)
@@ -35,6 +36,7 @@ function cabac_encode_decision_asm, export=1
add w10, w1, #CABAC_STATE
ldrb w3, [x0, x10] // i_state
ldr w12, [x0, #CABAC_I_RANGE]
+ ldr w7, [x0, #CABAC_BIN_COUNT]
and x4, x3, #~1
asr w5, w12, #6
add x8, x8, x4, lsl #1
@@ -42,7 +44,9 @@ function cabac_encode_decision_asm, export=1
eor w6, w2, w3 // b ^ i_state
ldrb w4, [x8, x5] // i_range_lps
ldr w11, [x0, #CABAC_I_LOW]
+adds w7, w7, #1 // i_bin_cnt += 1
sub w12, w12, w4
+str w7, [x0, #CABAC_BIN_COUNT]
tbz w6, #0, 1f // (b ^ i_state) & 1
add w11, w11, w12
mov w12, w4
@@ -103,11 +107,14 @@ endfunc
function cabac_encode_bypass_asm, export=1
ldr w12, [x0, #CABAC_I_RANGE]
+ ldr w7, [x0, #CABAC_BIN_COUNT]
ldr w11, [x0, #CABAC_I_LOW]
ldr w2, [x0, #CABAC_I_QUEUE]
and w1, w1, w12
add w11, w1, w11, lsl #1
+ adds w7, w7, #1 // i_bin_cnt += 1
adds w2, w2, #1
+ str w7, [x0, #CABAC_BIN_COUNT]
b.ge cabac_putbyte
str w11, [x0, #CABAC_I_LOW]
str w2, [x0, #CABAC_I_QUEUE]
@@ -116,7 +123,10 @@ endfunc
function cabac_encode_terminal_asm, export=1
ldr w12, [x0, #CABAC_I_RANGE]
+ ldr w7, [x0, #CABAC_BIN_COUNT]
ldr w11, [x0, #CABAC_I_LOW]
sub w12, w12, #2
+ adds w7, w7, #1 // i_bin_cnt += 1
+ str w7, [x0, #CABAC_BIN_COUNT]
b cabac_encode_renorm
endfunc
diff --git a/common/cabac.c b/common/cabac.c
index 8c3e72af..f2f2b01d 100644
--- a/common/cabac.c
+++ b/common/cabac.c
@@ -56,6 +56,7 @@ void x264_cabac_encode_init_core( x264_cabac_t *cb )
cb->i_range = 0x01FE;
cb->i_queue = -9; // the first bit will be shifted away and not written
cb->i_bytes_outstanding = 0;
+ cb->i_bin_cnt = 0;
}
void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end )
@@ -122,6 +123,7 @@ void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b )
}
cb->state[i_ctx] = x264_cabac_transition[i_state][b];
cabac_encode_renorm( cb );
+ cb->i_bin_cnt++;
}
/* Note: b is negated for this function */
@@ -131,6 +133,7 @@ void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b )
cb->i_low += b & cb->i_range;
cb->i_queue += 1;
cabac_putbyte( cb );
+ cb->i_bin_cnt++;
}
static const int bypass_lut[16] =
@@ -146,6 +149,8 @@ void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val )
uint32_t x = (bypass_lut[k-exp_bits]<<exp_bits) + v;
k = 2*k+1-exp_bits;
int i = ((k-1)&7)+1;
+
+ cb->i_bin_cnt += k;
do {
k -= i;
cb->i_low <<= i;
@@ -160,6 +165,7 @@ void x264_cabac_encode_terminal_c( x264_cabac_t *cb )
{
cb->i_range -= 2;
cabac_encode_renorm( cb );
+ cb->i_bin_cnt++;
}
void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb )
diff --git a/common/cabac.h b/common/cabac.h
index b573416e..a29a8763 100644
--- a/common/cabac.h
+++ b/common/cabac.h
@@ -37,10 +37,13 @@ typedef struct
int i_queue; //stored with an offset of -8 for faster asm
int i_bytes_outstanding;
+
uint8_t *p_start;
uint8_t *p;
uint8_t *p_end;
+ int i_bin_cnt; // bin counter needed for cabac zero word padding of access unit
+
/* aligned for memcpy_aligned starting here */
ALIGNED_64( int f8_bits_encoded ); // only if using x264_cabac_size_decision()
diff --git a/common/common.h b/common/common.h
index 320206ef..febe11c3 100644
--- a/common/common.h
+++ b/common/common.h
@@ -259,6 +259,8 @@ typedef struct
int i_mb_field[3];
/* Adaptive direct mv pred */
int i_direct_score[2];
+ /* bin counter needed for cabac zero word padding of access unit */
+ int i_bin_cnt;
/* Metrics */
int64_t i_ssd[3];
double f_ssim;
diff --git a/common/x86/cabac-a.asm b/common/x86/cabac-a.asm
index fcafd9c4..ae2d905c 100644
--- a/common/x86/cabac-a.asm
+++ b/common/x86/cabac-a.asm
@@ -115,6 +115,7 @@ struc cb
.start: pointer 1
.p: pointer 1
.end: pointer 1
+ .i_bin_cnt: resd 1
align 64, resb 1
.bits_encoded: resd 1
.state: resb 1024
@@ -143,12 +144,13 @@ endstruc
DECLARE_REG_TMP 0,4,2,1,3,5,6,2
%endif
-cglobal cabac_encode_decision_%1, 1,7
+cglobal cabac_encode_decision_%1, 1,7
movifnidn t1d, r1m
mov t5d, [r0+cb.range]
movzx t6d, byte [r0+cb.state+t1]
+ inc dword [r0+cb.i_bin_cnt]
movifnidn t0, r0 ; WIN64
- mov t4d, ~1
+ mov t4d, ~1
mov t3d, t5d
and t4d, t6d
shr t5d, 6
@@ -193,13 +195,14 @@ cglobal cabac_encode_decision_%1, 1,7
mov [t0+cb.queue], t3d
RET
-cglobal cabac_encode_bypass_%1, 2,3
+cglobal cabac_encode_bypass_%1, 2,3
mov t7d, [r0+cb.low]
and r1d, [r0+cb.range]
- lea t7d, [t7*2+r1]
- movifnidn t0, r0 ; WIN64
- mov t3d, [r0+cb.queue]
- inc t3d
+ inc dword [r0+cb.i_bin_cnt]
+ lea t7d, [t7*2+r1]
+ movifnidn t0, r0 ; WIN64
+ mov t3d, [r0+cb.queue]
+ inc t3d
%if ARCH_X86_64 ; .putbyte compiles to nothing but a jmp
jge cabac_putbyte_%1
%else
@@ -217,7 +220,8 @@ cglobal cabac_encode_bypass_%1, 2,3
%ifnidn %1,bmi2
cglobal cabac_encode_terminal_%1, 1,3
- sub dword [r0+cb.range], 2
+ inc dword [r0+cb.i_bin_cnt]
+ sub dword [r0+cb.range], 2
; shortcut: the renormalization shift in terminal
; can only be 0 or 1 and is zero over 99% of the time.
test dword [r0+cb.range], 0x100
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 7316a586..bb3f8d67 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -37,6 +37,7 @@
#endif
//#define DEBUG_MB_TYPE
+#define INSERT_CABAC_ZERO_WORD 1
#define bs_write_ue bs_write_ue_big
@@ -2005,6 +2006,55 @@ static int encoder_encapsulate_nals( x264_t *h, int start )
return nal_buffer - (h0->nal_buffer + previous_nal_size);
}
+#if INSERT_CABAC_ZERO_WORD
+static int encoder_insert_cabaczerowords( x264_t *h, int frame_size )
+{
+ int RawMbBits = 256 * h->param.i_bitdepth;
+ int min_num_bytes = 0;
+ int i_chroma_format_idc = h->sps->i_chroma_format_idc;
+
+ if(i_chroma_format_idc == CHROMA_420 )
+ RawMbBits = (RawMbBits * 3) / 2;
+ else if( i_chroma_format_idc == CHROMA_422 )
+ RawMbBits = (RawMbBits * 2);
+ else if( i_chroma_format_idc == CHROMA_444 )
+ RawMbBits = (RawMbBits * 3);
+
+ /* Check for cabac zero word stuffing based on output frame size, bin count and raw bits as per section 7.4.2.10
+ Also see section 9.3.4.6 Byte stuffing process */
+ min_num_bytes = ((96 * h->stat.frame.i_bin_cnt) - (RawMbBits * h->mb.i_mb_count * 3) + 1023) / 1024;
+ //printf("bin cnt = %d, min_num_bytes = %d, frame_size = %d, \n",h->stat.frame.i_bin_cnt, min_num_bytes, frame_size);
+ if(frame_size < min_num_bytes)
+ {
+ int stuffing_bytes = min_num_bytes - frame_size;
+ x264_nal_t *nal = &h->out.nal[h->out.i_nal-1];
+
+ /* If the required buffer size exceeds current allocated size, re-allocate the nal buffer.
+ Note that +2 guardband is due to insertion of cabac zero word with EPB (0x000003) in steps of 3 */
+ if( check_encapsulated_buffer( h, h->thread[0], h->out.i_nal, frame_size, (min_num_bytes+2)) < 0 )
+ return -1;
+
+ //printf ("Inserting %d/%d cabac_zero_word syntax elements/bytes (Clause 7.4.2.10)\n", ((stuffing_bytes + 2)/3), stuffing_bytes);
+ {
+ uint8_t *nal_buf = nal->p_payload + nal->i_payload;
+ int i;
+ for (i = 0; i < stuffing_bytes; i+=3 )
+ {
+ *nal_buf++ = 0x00; // CABAC zero word
+ *nal_buf++ = 0x00;
+ *nal_buf++ = 0x03;
+ }
+
+ nal->i_payload += i;
+ nal->i_padding += i;
+ frame_size += i;
+ }
+ }
+
+ return (frame_size);
+}
+#endif
+
/****************************************************************************
* x264_encoder_headers:
****************************************************************************/
@@ -2996,6 +3046,8 @@ cont:
{
x264_cabac_encode_flush( h, &h->cabac );
h->out.bs.p = h->cabac.p;
+
+ h->stat.frame.i_bin_cnt = h->cabac.i_bin_cnt;
}
else
{
@@ -3837,6 +3889,16 @@ static int encoder_frame_end( x264_t *h, x264_t *thread_current,
if( frame_size < 0 )
return -1;
+#if INSERT_CABAC_ZERO_WORD
+ if(h->param.b_cabac)
+ {
+ /* cabac zero word insertion; Clause 7.4.2.10 */
+ frame_size = encoder_insert_cabaczerowords(h, frame_size);
+ if( frame_size < 0 )
+ return -1;
+ }
+#endif
+
/* Set output picture properties */
pic_out->i_type = h->fenc->i_type;
--
2.13.0.windows.1
This is confidential Ittiam property.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-cabac-zerobytes.patch
Type: application/octet-stream
Size: 12281 bytes
Desc: 0001-cabac-zerobytes.patch
URL: <http://mailman.videolan.org/pipermail/x264-devel/attachments/20181026/6a8321f6/attachment-0001.obj>
More information about the x264-devel
mailing list