[x264-devel] [PATCH 25/32] Enable assembly templating for arm/aarch64 architectures
Vittorio Giovara
vittorio.giovara at gmail.com
Fri Jan 20 15:20:50 CET 2017
Use EXTERN_ASM prefix to rename all internal asm symbols.
---
Makefile | 24 ++++++++++++++++++++----
common/aarch64/asm.S | 13 +++++++------
common/arm/asm.S | 13 +++++++------
tools/asm.list | 31 +++++++++++++++++++++++++++++++
tools/duplicate-asm.sh | 3 ++-
5 files changed, 67 insertions(+), 17 deletions(-)
diff --git a/Makefile b/Makefile
index 4ce4c60..908599c 100644
--- a/Makefile
+++ b/Makefile
@@ -128,7 +128,10 @@ ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \
common/arm/dct-a.S common/arm/quant-a.S common/arm/deblock-a.S \
common/arm/predict-a.S common/arm/bitstream-a.S
SRCS += common/arm/mc-c.c common/arm/predict-c.c
-OBJASM = $(ASMSRC:%.S=%.o)
+
+OBJASM += $(ASMSRC:%.S=8bit/%.o)
+OBJASM += $(ASMSRC:%.S=10bit/%.o)
+
OBJCHK += tools/checkasm-arm.o
endif
endif
@@ -147,7 +150,10 @@ ASMSRC += common/aarch64/bitstream-a.S \
SRCS += common/aarch64/asm-offsets.c \
common/aarch64/mc-c.c \
common/aarch64/predict-c.c
-OBJASM = $(ASMSRC:%.S=%.o)
+
+OBJASM += $(ASMSRC:%.S=8bit/%.o)
+OBJASM += $(ASMSRC:%.S=10bit/%.o)
+
OBJCHK += tools/checkasm-aarch64.o
endif
endif
@@ -241,12 +247,12 @@ $(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJEXAMPLE): .depend
$(AS) $(ASFLAGS) -o $@ $<
-@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
-8bit/%.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm
+8bit/%.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm common/bitdepth.h common/bitdepth-asm.h
@mkdir -p $(dir $@)
$(AS) $(ASFLAGS) -o $@ $< -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8 -Dprivate_prefix=x264_8
-@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
-10bit/%.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm
+10bit/%.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm common/bitdepth.h common/bitdepth-asm.h
@mkdir -p $(dir $@)
$(AS) $(ASFLAGS) -o $@ $< -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10 -Dprivate_prefix=x264_10
-@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
@@ -255,6 +261,16 @@ $(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJEXAMPLE): .depend
$(AS) $(ASFLAGS) -o $@ $<
-@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
+8bit/%.o: %.S
+ @mkdir -p $(dir $@)
+ $(AS) $(ASFLAGS) -o $@ $< -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8
+ -@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
+
+10bit/%.o: %.S
+ @mkdir -p $(dir $@)
+ $(AS) $(ASFLAGS) -o $@ $< -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10
+ -@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
+
%.dll.o: %.rc x264.h
$(RC) $(RCFLAGS)$@ -DDLL $<
diff --git a/common/aarch64/asm.S b/common/aarch64/asm.S
index fff572a..6dd5a3a 100644
--- a/common/aarch64/asm.S
+++ b/common/aarch64/asm.S
@@ -27,12 +27,17 @@
#include "config.h"
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+
#ifdef PREFIX
-# define EXTERN_ASM _x264_
+# define EXTERN_ASM JOIN(JOIN(_x264_, BIT_DEPTH), _)
#else
-# define EXTERN_ASM x264_
+# define EXTERN_ASM JOIN(JOIN(x264_, BIT_DEPTH), _)
#endif
+#define X(s) JOIN(EXTERN_ASM, s)
+
#ifdef __ELF__
# define ELF
#else
@@ -94,10 +99,6 @@ MACH .const_data
#endif
.endm
-#define GLUE(a, b) a ## b
-#define JOIN(a, b) GLUE(a, b)
-#define X(s) JOIN(EXTERN_ASM, s)
-
#define FDEC_STRIDE 32
#define FENC_STRIDE 16
diff --git a/common/arm/asm.S b/common/arm/asm.S
index dbb3168..7e486b8 100644
--- a/common/arm/asm.S
+++ b/common/arm/asm.S
@@ -38,12 +38,17 @@
.fpu neon
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+
#ifdef PREFIX
-# define EXTERN_ASM _x264_
+# define EXTERN_ASM JOIN(JOIN(_x264_, BIT_DEPTH), _)
#else
-# define EXTERN_ASM x264_
+# define EXTERN_ASM JOIN(JOIN(x264_, BIT_DEPTH), _)
#endif
+#define X(s) JOIN(EXTERN_ASM, s)
+
#ifdef __ELF__
# define ELF
#else
@@ -164,10 +169,6 @@ ELF .size \name, . - \name
#endif
.endm
-#define GLUE(a, b) a ## b
-#define JOIN(a, b) GLUE(a, b)
-#define X(s) JOIN(EXTERN_ASM, s)
-
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
diff --git a/tools/asm.list b/tools/asm.list
index b6959a5..7787bdd 100644
--- a/tools/asm.list
+++ b/tools/asm.list
@@ -40,6 +40,7 @@ decimate_score16
decimate_score64
denoise_dct
dequant_4x4
+dequant_4x4_dc
dequant_4x4_flat16
dequant_4x4dc
dequant_8x8
@@ -94,9 +95,21 @@ mc_offsetsub_w4
mc_offsetsub_w8
mc_weight_w12
mc_weight_w16
+mc_weight_w16_nodenom
+mc_weight_w16_offsetadd
+mc_weight_w16_offsetsub
mc_weight_w20
+mc_weight_w20_nodenom
+mc_weight_w20_offsetadd
+mc_weight_w20_offsetsub
mc_weight_w4
+mc_weight_w4_nodenom
+mc_weight_w4_offsetadd
+mc_weight_w4_offsetsub
mc_weight_w8
+mc_weight_w8_nodenom
+mc_weight_w8_offsetadd
+mc_weight_w8_offsetsub
memcpy_aligned
memzero_aligned
nal_escape
@@ -137,6 +150,14 @@ pixel_sad_4x8
pixel_sad_8x16
pixel_sad_8x4
pixel_sad_8x8
+pixel_sad_aligned
+pixel_sad_aligned_16x16
+pixel_sad_aligned_16x8
+pixel_sad_aligned_4x4
+pixel_sad_aligned_4x8
+pixel_sad_aligned_8x16
+pixel_sad_aligned_8x4
+pixel_sad_aligned_8x8
pixel_sad_x3_16x16
pixel_sad_x3_16x8
pixel_sad_x3_4x4
@@ -187,21 +208,26 @@ predict_16x16_dc_left
predict_16x16_dc_top
predict_16x16_h
predict_16x16_init
+predict_16x16_p
predict_16x16_p_core
predict_16x16_v
predict_4x4_dc
+predict_4x4_dc_top
predict_4x4_ddl
predict_4x4_ddr
predict_4x4_h
predict_4x4_hd
predict_4x4_hu
predict_4x4_init
+predict_4x4_v
predict_4x4_vl
predict_4x4_vr
predict_8x16c_dc
+predict_8x16c_dc_left
predict_8x16c_dc_top
predict_8x16c_h
predict_8x16c_init
+predict_8x16c_p
predict_8x16c_p_core
predict_8x16c_v
predict_8x8_dc
@@ -218,11 +244,14 @@ predict_8x8_v
predict_8x8_vl
predict_8x8_vr
predict_8x8c_dc
+predict_8x8c_dc_left
predict_8x8c_dc_top
predict_8x8c_h
predict_8x8c_init
+predict_8x8c_p
predict_8x8c_p_core
predict_8x8c_v
+prefetch_fenc
prefetch_fenc_420
prefetch_fenc_422
prefetch_ref
@@ -254,3 +283,5 @@ zigzag_sub_4x4_field
zigzag_sub_4x4_frame
zigzag_sub_4x4ac_field
zigzag_sub_4x4ac_frame
+zigzag_sub_8x8_field
+zigzag_sub_8x8_frame
diff --git a/tools/duplicate-asm.sh b/tools/duplicate-asm.sh
index 81c3ed6..60b3ac9 100755
--- a/tools/duplicate-asm.sh
+++ b/tools/duplicate-asm.sh
@@ -13,7 +13,8 @@ ARCH_LIST="sse2_amd sse2_aligned sse2_lzcnt sse2slow sse2 \
ssse3_atom ssse3_aligned ssse3_cache64 ssse3_lzcnt ssse3 sse3 sse4 sse \
avx2_bmi2 avx2_lzcnt avx2 avx \
mmx2_lzcnt mmx2 mmx \
- asm atom fma4 xop"
+ asm atom fma4 xop \
+ neon_dual neon armv6 arm aarch64"
for var in $API_LIST; do
for arch in $ARCH_LIST; do
--
2.10.0
More information about the x264-devel
mailing list