[x264-devel] [PATCH 23/32] Generate a header listing every assembly symbol

Vittorio Giovara vittorio.giovara at gmail.com
Fri Jan 20 15:20:48 CET 2017


Equivalent to the C counterpart.
---
 .gitignore             |   1 +
 Makefile               |   4 +-
 common/common.h        |   1 +
 tools/asm.list         | 256 +++++++++++++++++++++++++++++++++++++++++++++++++
 tools/duplicate-asm.sh |  24 +++++
 5 files changed, 285 insertions(+), 1 deletion(-)
 create mode 100644 tools/asm.list
 create mode 100755 tools/duplicate-asm.sh

diff --git a/.gitignore b/.gitignore
index 5476ace..5ed2fd0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,5 +46,6 @@ checkasm
 dataDec.txt
 log.dec
 common/bitdepth.h
+common/bitdepth-asm.h
 common/oclobj.h
 x264_lookahead.clbin
diff --git a/Makefile b/Makefile
index 11858b3..b0516e9 100644
--- a/Makefile
+++ b/Makefile
@@ -180,7 +180,9 @@ endif
 
 common/bitdepth.h: $(SRCPATH)/tools/duplicate.sh $(SRCPATH)/tools/api.list
 	$(SRCPATH)/tools/duplicate.sh $(SRCPATH)/tools/api.list > $@
-HEADERS += common/bitdepth.h
+common/bitdepth-asm.h: $(SRCPATH)/tools/duplicate-asm.sh $(SRCPATH)/tools/asm.list
+	$(SRCPATH)/tools/duplicate-asm.sh $(SRCPATH)/tools/asm.list > $@
+HEADERS += common/bitdepth.h common/bitdepth-asm.h
 GENERATED += $(HEADERS)
 
 OBJS   += $(SRCSCOMMON:%.c=%.o)
diff --git a/common/common.h b/common/common.h
index bbbb5d9..587062f 100644
--- a/common/common.h
+++ b/common/common.h
@@ -28,6 +28,7 @@
 #define X264_COMMON_H
 
 #include "common/bitdepth.h"
+#include "common/bitdepth-asm.h"
 
 /****************************************************************************
  * Macros
diff --git a/tools/asm.list b/tools/asm.list
new file mode 100644
index 0000000..b6959a5
--- /dev/null
+++ b/tools/asm.list
@@ -0,0 +1,256 @@
+add16x16_idct
+add16x16_idct8
+add16x16_idct_dc
+add4x4_idct
+add8x8_idct
+add8x8_idct8
+add8x8_idct_dc
+cabac_block_residual_8x8_rd_internal
+cabac_block_residual_internal
+cabac_block_residual_rd_internal
+cabac_encode_bypass
+cabac_encode_decision
+cabac_encode_terminal
+coeff_last15
+coeff_last16
+coeff_last4
+coeff_last64
+coeff_last8
+coeff_level_run15
+coeff_level_run16
+coeff_level_run4
+coeff_level_run8
+dct2x4dc
+dct4x4dc
+deblock_h_chroma
+deblock_h_chroma_422
+deblock_h_chroma_422_intra
+deblock_h_chroma_intra
+deblock_h_chroma_intra_mbaff
+deblock_h_chroma_mbaff
+deblock_h_luma
+deblock_h_luma_intra
+deblock_strength
+deblock_v_chroma
+deblock_v_chroma_intra
+deblock_v_luma
+deblock_v_luma_intra
+decimate_score15
+decimate_score16
+decimate_score64
+denoise_dct
+dequant_4x4
+dequant_4x4_flat16
+dequant_4x4dc
+dequant_8x8
+dequant_8x8_flat16
+frame_init_lowres_core
+hpel_filter
+hpel_filter_c
+hpel_filter_h
+hpel_filter_v
+idct4x4dc
+idct_dequant_2x4_dc
+idct_dequant_2x4_dconly
+integral_init4h
+integral_init4v
+integral_init8h
+integral_init8v
+intra_sa8d_x3_8x8
+intra_sa8d_x9_8x8
+intra_sad_x3_16x16
+intra_sad_x3_4x4
+intra_sad_x3_8x16c
+intra_sad_x3_8x8
+intra_sad_x3_8x8c
+intra_sad_x9_4x4
+intra_sad_x9_8x8
+intra_satd_x3_16x16
+intra_satd_x3_4x4
+intra_satd_x3_8x16c
+intra_satd_x3_8x8c
+intra_satd_x9_4x4
+load_deinterleave_chroma_fdec
+load_deinterleave_chroma_fenc
+mbtree_fix8_pack
+mbtree_fix8_unpack
+mbtree_propagate_cost
+mbtree_propagate_list_internal
+mc_chroma
+mc_copy_w16
+mc_copy_w16_aligned
+mc_copy_w4
+mc_copy_w8
+mc_init
+mc_offsetadd_w12
+mc_offsetadd_w16
+mc_offsetadd_w20
+mc_offsetadd_w4
+mc_offsetadd_w8
+mc_offsetsub_w12
+mc_offsetsub_w16
+mc_offsetsub_w20
+mc_offsetsub_w4
+mc_offsetsub_w8
+mc_weight_w12
+mc_weight_w16
+mc_weight_w20
+mc_weight_w4
+mc_weight_w8
+memcpy_aligned
+memzero_aligned
+nal_escape
+optimize_chroma_2x2_dc
+pixel_ads1
+pixel_ads2
+pixel_ads4
+pixel_asd8
+pixel_avg2_w10
+pixel_avg2_w12
+pixel_avg2_w16
+pixel_avg2_w18
+pixel_avg2_w20
+pixel_avg2_w4
+pixel_avg2_w8
+pixel_avg_16x16
+pixel_avg_16x8
+pixel_avg_4x16
+pixel_avg_4x2
+pixel_avg_4x4
+pixel_avg_4x8
+pixel_avg_8x16
+pixel_avg_8x4
+pixel_avg_8x8
+pixel_avg_wtab
+pixel_hadamard_ac_16x16
+pixel_hadamard_ac_16x8
+pixel_hadamard_ac_8x16
+pixel_hadamard_ac_8x8
+pixel_sa8d_16x16
+pixel_sa8d_8x8
+pixel_sa8d_satd_16x16
+pixel_sad_16x16
+pixel_sad_16x8
+pixel_sad_4x16
+pixel_sad_4x4
+pixel_sad_4x8
+pixel_sad_8x16
+pixel_sad_8x4
+pixel_sad_8x8
+pixel_sad_x3_16x16
+pixel_sad_x3_16x8
+pixel_sad_x3_4x4
+pixel_sad_x3_4x8
+pixel_sad_x3_8x16
+pixel_sad_x3_8x4
+pixel_sad_x3_8x8
+pixel_sad_x4_16x16
+pixel_sad_x4_16x8
+pixel_sad_x4_4x4
+pixel_sad_x4_4x8
+pixel_sad_x4_8x16
+pixel_sad_x4_8x4
+pixel_sad_x4_8x8
+pixel_satd_16x16
+pixel_satd_16x8
+pixel_satd_4x16
+pixel_satd_4x4
+pixel_satd_4x8
+pixel_satd_8x16
+pixel_satd_8x4
+pixel_satd_8x8
+pixel_ssd_16x16
+pixel_ssd_16x8
+pixel_ssd_4x16
+pixel_ssd_4x4
+pixel_ssd_4x8
+pixel_ssd_8x16
+pixel_ssd_8x4
+pixel_ssd_8x8
+pixel_ssd_nv12_core
+pixel_ssim_4x4x2_core
+pixel_ssim_end4
+pixel_var2_8x16
+pixel_var2_8x8
+pixel_var_16x16
+pixel_var_8x16
+pixel_var_8x8
+pixel_vsad
+plane_copy_core
+plane_copy_deinterleave
+plane_copy_deinterleave_rgb
+plane_copy_deinterleave_v210
+plane_copy_interleave_core
+plane_copy_swap_core
+predict_16x16_dc
+predict_16x16_dc_left
+predict_16x16_dc_top
+predict_16x16_h
+predict_16x16_init
+predict_16x16_p_core
+predict_16x16_v
+predict_4x4_dc
+predict_4x4_ddl
+predict_4x4_ddr
+predict_4x4_h
+predict_4x4_hd
+predict_4x4_hu
+predict_4x4_init
+predict_4x4_vl
+predict_4x4_vr
+predict_8x16c_dc
+predict_8x16c_dc_top
+predict_8x16c_h
+predict_8x16c_init
+predict_8x16c_p_core
+predict_8x16c_v
+predict_8x8_dc
+predict_8x8_dc_left
+predict_8x8_dc_top
+predict_8x8_ddl
+predict_8x8_ddr
+predict_8x8_filter
+predict_8x8_h
+predict_8x8_hd
+predict_8x8_hu
+predict_8x8_init
+predict_8x8_v
+predict_8x8_vl
+predict_8x8_vr
+predict_8x8c_dc
+predict_8x8c_dc_top
+predict_8x8c_h
+predict_8x8c_init
+predict_8x8c_p_core
+predict_8x8c_v
+prefetch_fenc_420
+prefetch_fenc_422
+prefetch_ref
+quant_2x2_dc
+quant_4x4
+quant_4x4_dc
+quant_4x4x4
+quant_8x8
+store_interleave_chroma
+sub16x16_dct
+sub16x16_dct8
+sub4x4_dct
+sub8x16_dct_dc
+sub8x8_dct
+sub8x8_dct8
+sub8x8_dct_dc
+trellis_cabac_4x4
+trellis_cabac_4x4_psy
+trellis_cabac_8x8
+trellis_cabac_8x8_psy
+trellis_cabac_chroma_422_dc
+trellis_cabac_dc
+zigzag_interleave_8x8_cavlc
+zigzag_scan_4x4_field
+zigzag_scan_4x4_frame
+zigzag_scan_8x8_field
+zigzag_scan_8x8_frame
+zigzag_sub_4x4_field
+zigzag_sub_4x4_frame
+zigzag_sub_4x4ac_field
+zigzag_sub_4x4ac_frame
diff --git a/tools/duplicate-asm.sh b/tools/duplicate-asm.sh
new file mode 100755
index 0000000..81c3ed6
--- /dev/null
+++ b/tools/duplicate-asm.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+# Read the list of symbols to rename and create a header file.
+
+echo "#ifndef COMMON_BITDEPTH_ASM_H"
+echo "#define COMMON_BITDEPTH_ASM_H"
+echo "#define x264_glue_expand(x,y,z) x##y##z"
+echo "#define x264_glue(x,y,z) x264_glue_expand(x,y,z)"
+
+API_LIST=`cat $1`
+ARCH_LIST="sse2_amd sse2_aligned sse2_lzcnt sse2slow sse2 \
+           cache64_ssse3_atom cache64_ssse3 cache64_mmx2 cache64_sse2 cache64 \
+           ssse3_atom ssse3_aligned ssse3_cache64 ssse3_lzcnt ssse3 sse3 sse4 sse \
+           avx2_bmi2 avx2_lzcnt avx2 avx \
+           mmx2_lzcnt mmx2 mmx \
+           asm atom fma4 xop"
+
+for var in $API_LIST; do
+    for arch in $ARCH_LIST; do
+        echo "#define x264_${var}_${arch} x264_glue(x264_,BIT_DEPTH,_${var}_${arch})"
+    done
+done
+
+echo "#endif"
-- 
2.10.0



More information about the x264-devel mailing list