[x264-devel] [PATCH 23/23] aarch64: cabac_encode_{decision, bypass, terminal}_asm
Janne Grunau
janne-x264 at jannau.net
Thu Nov 27 08:56:51 CET 2014
benchmarks on a Nexus 9 (nvidia denver):
101.3 cycles in x264_cabac_encode_decision_c, 67105369 runs, 3495 skips
97.3 cycles in x264_cabac_encode_decision_asm, 67105493 runs, 3371 skips
132.8 cycles in x264_cabac_encode_terminal_c, 1046950 runs, 1626 skips
116.1 cycles in x264_cabac_encode_terminal_asm, 1048424 runs, 152 skips
92.4 cycles in x264_cabac_encode_bypass_c, 16776192 runs, 1024 skips
89.6 cycles in x264_cabac_encode_bypass_asm, 16776453 runs, 763 skips
Cycle counts are not as stable as one would like. The dynamic code
optimisation seems to produce different results for small chnages in a
binary. Repeated runs with the same binary produce stable results
though (ignoring the first run).
---
Makefile | 4 +-
common/aarch64/asm-offsets.c | 43 +++++++++++++++
common/aarch64/asm-offsets.h | 37 +++++++++++++
common/aarch64/cabac-a.S | 122 +++++++++++++++++++++++++++++++++++++++++++
common/cabac.h | 4 ++
tools/checkasm.c | 2 +
6 files changed, 211 insertions(+), 1 deletion(-)
create mode 100644 common/aarch64/asm-offsets.c
create mode 100644 common/aarch64/asm-offsets.h
create mode 100644 common/aarch64/cabac-a.S
diff --git a/Makefile b/Makefile
index f293542..12c74e4 100644
--- a/Makefile
+++ b/Makefile
@@ -129,13 +129,15 @@ endif
ifeq ($(ARCH),AARCH64)
ifneq ($(AS),)
ASMSRC += common/aarch64/bitstream-a.S \
+ common/aarch64/cabac-a.S \
common/aarch64/dct-a.S \
common/aarch64/deblock-a.S \
common/aarch64/mc-a.S \
common/aarch64/pixel-a.S \
common/aarch64/predict-a.S \
common/aarch64/quant-a.S
-SRCS += common/aarch64/mc-c.c \
+SRCS += common/aarch64/asm-offsets.c \
+ common/aarch64/mc-c.c \
common/aarch64/predict-c.c
OBJASM = $(ASMSRC:%.S=%.o)
endif
diff --git a/common/aarch64/asm-offsets.c b/common/aarch64/asm-offsets.c
new file mode 100644
index 0000000..2fcf5a4
--- /dev/null
+++ b/common/aarch64/asm-offsets.c
@@ -0,0 +1,43 @@
+/*****************************************************************************
+ * aarch64/asm-offsets.c: check asm offsets for aarch64
+ *****************************************************************************
+ * Copyright (C) 2014 x264 project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include <stdint.h>
+#include "common/osdep.h"
+#include "common/common.h"
+#include "common/cabac.h"
+#include "asm-offsets.h"
+
+#define X264_CHECK_OFFSET(s, m, o) struct check_##s##_##m { \
+ int m_##m[2 * (offsetof(s, m) == o) - 1]; \
+ }
+
+
+X264_CHECK_OFFSET(x264_cabac_t, i_low, CABAC_I_LOW);
+X264_CHECK_OFFSET(x264_cabac_t, i_range, CABAC_I_RANGE);
+X264_CHECK_OFFSET(x264_cabac_t, i_queue, CABAC_I_QUEUE);
+X264_CHECK_OFFSET(x264_cabac_t, i_bytes_outstanding, CABAC_I_BYTES_OUTSTANDING);
+X264_CHECK_OFFSET(x264_cabac_t, p_start, CABAC_P_START);
+X264_CHECK_OFFSET(x264_cabac_t, p, CABAC_P);
+X264_CHECK_OFFSET(x264_cabac_t, p_end, CABAC_P_END);
+X264_CHECK_OFFSET(x264_cabac_t, f8_bits_encoded, CABAC_F8_BITS_ENCODED);
+X264_CHECK_OFFSET(x264_cabac_t, state, CABAC_STATE);
diff --git a/common/aarch64/asm-offsets.h b/common/aarch64/asm-offsets.h
new file mode 100644
index 0000000..64d5193
--- /dev/null
+++ b/common/aarch64/asm-offsets.h
@@ -0,0 +1,37 @@
+/*****************************************************************************
+ * aarch64/asm-offsets.h: asm offsets for aarch64
+ *****************************************************************************
+ * Copyright (C) 2014 x264 project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#ifndef X264_AARCH64_ASM_OFFSETS_H
+#define X264_AARCH64_ASM_OFFSETS_H
+
+#define CABAC_I_LOW 0x00
+#define CABAC_I_RANGE 0x04
+#define CABAC_I_QUEUE 0x08
+#define CABAC_I_BYTES_OUTSTANDING 0x0c
+#define CABAC_P_START 0x10
+#define CABAC_P 0x18
+#define CABAC_P_END 0x20
+#define CABAC_F8_BITS_ENCODED 0x30
+#define CABAC_STATE 0x34
+
+#endif
diff --git a/common/aarch64/cabac-a.S b/common/aarch64/cabac-a.S
new file mode 100644
index 0000000..ffcbb92
--- /dev/null
+++ b/common/aarch64/cabac-a.S
@@ -0,0 +1,122 @@
+/*****************************************************************************
+ * cabac-a.S: aarch64 cabac
+ *****************************************************************************
+ * Copyright (C) 2014 x264 project
+ *
+ * Authors: Janne Grunau <janne-x264 at jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include "asm.S"
+#include "asm-offsets.h"
+
+// w11 holds x264_cabac_t.i_low
+// w12 holds x264_cabac_t.i_range
+
+function x264_cabac_encode_decision_asm, export=1
+ movrel x8, X(x264_cabac_range_lps)
+ movrel x9, X(x264_cabac_transition)
+ add w10, w1, #CABAC_STATE
+ ldrb w3, [x0, x10] // i_state
+ ldr w12, [x0, #CABAC_I_RANGE]
+ and x4, x3, #~1
+ asr w5, w12, #6
+ add x8, x8, x4, lsl #1
+ sub w5, w5, #4
+ eor w6, w2, w3 // b ^ i_state
+ ldrb w4, [x8, x5] // i_range_lps
+ ldr w11, [x0, #CABAC_I_LOW]
+ sub w12, w12, w4
+ tbz w6, #0, 1f // (b ^ i_state) & 1
+ add w11, w11, w12
+ mov w12, w4
+1:
+ orr w4, w2, w3, lsl #1
+ ldrb w9, [x9, x4]
+ strb w9, [x0, x10] // i_state
+
+cabac_encode_renorm:
+ clz w5, w12
+ ldr w2, [x0, #CABAC_I_QUEUE]
+ sub w5, w5, #23
+ lsl w12, w12, w5
+ lsl w11, w11, w5
+2:
+ adds w2, w2, w5
+ str w12, [x0, #CABAC_I_RANGE]
+ b.lt 0f
+cabac_putbyte:
+ mov w13, #0x400
+ add w12, w2, #10
+ lsl w13, w13, w2
+ asr w4, w11, w12 // out
+ sub w2, w2, #8
+ sub w13, w13, #1
+ subs w5, w4, #0xff
+ and w11, w11, w13
+ ldr w6, [x0, #CABAC_I_BYTES_OUTSTANDING]
+ str w2, [x0, #CABAC_I_QUEUE]
+ b.ne 1f
+
+ add w6, w6, #1
+ str w11, [x0, #CABAC_I_LOW]
+ str w6, [x0, #CABAC_I_BYTES_OUTSTANDING]
+ ret
+
+1:
+ ldr x7, [x0, #CABAC_P]
+ asr w5, w4, #8 // carry
+ ldrb w8, [x7, #-1]
+ add w8, w8, w5
+ sub w5, w5, #1
+ strb w8, [x7, #-1]
+ cbz w6, 3f
+2:
+ subs w6, w6, #1
+ strb w5, [x7], #1
+ b.gt 2b
+3:
+ strb w4, [x7], #1
+ str wzr, [x0, #CABAC_I_BYTES_OUTSTANDING]
+ str x7, [x0, #CABAC_P]
+0:
+ str w11, [x0, #CABAC_I_LOW]
+ str w2, [x0, #CABAC_I_QUEUE]
+ ret
+endfunc
+
+function x264_cabac_encode_bypass_asm, export=1
+ ldr w12, [x0, #CABAC_I_RANGE]
+ ldr w11, [x0, #CABAC_I_LOW]
+ ldr w2, [x0, #CABAC_I_QUEUE]
+ and w1, w1, w12
+ add w11, w1, w11, lsl #1
+ adds w2, w2, #1
+ b.ge cabac_putbyte
+ str w11, [x0, #CABAC_I_LOW]
+ str w2, [x0, #CABAC_I_QUEUE]
+ ret
+endfunc
+
+function x264_cabac_encode_terminal_asm, export=1
+ ldr w12, [x0, #CABAC_I_RANGE]
+ ldr w11, [x0, #CABAC_I_LOW]
+ sub w12, w12, #2
+ b cabac_encode_renorm
+endfunc
diff --git a/common/cabac.h b/common/cabac.h
index dbe6820..cc27761 100644
--- a/common/cabac.h
+++ b/common/cabac.h
@@ -72,6 +72,10 @@ void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb );
#define x264_cabac_encode_decision x264_cabac_encode_decision_asm
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
+#elif defined(ARCH_AARCH64)
+#define x264_cabac_encode_decision x264_cabac_encode_decision_asm
+#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
+#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
#else
#define x264_cabac_encode_decision x264_cabac_encode_decision_c
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_c
diff --git a/tools/checkasm.c b/tools/checkasm.c
index b6a1140..08c1917 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -2432,6 +2432,8 @@ static void run_cabac_terminal_##cpu( x264_t *h, uint8_t *dst )\
DECL_CABAC(c)
#if HAVE_MMX
DECL_CABAC(asm)
+#elif defined(ARCH_AARCH64)
+DECL_CABAC(asm)
#else
#define run_cabac_decision_asm run_cabac_decision_c
#define run_cabac_bypass_asm run_cabac_bypass_c
--
2.1.3
More information about the x264-devel
mailing list