[x264-devel] commit: Compile fixes for pre-ARMv6T2 and/or PIC (David Conrad )
git version control
git at videolan.org
Sun Sep 6 19:37:00 CEST 2009
x264 | branch: master | David Conrad <lessen42 at gmail.com> | Wed Sep 2 16:14:59 2009 -0700| [e390cbf993d180b1db413746272e232ac3068dad] | committer: Jason Garrett-Glaser
Compile fixes for pre-ARMv6T2 and/or PIC
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=e390cbf993d180b1db413746272e232ac3068dad
---
common/arm/asm.S | 11 +++++++++++
common/arm/deblock-a.S | 2 ++
common/arm/mc-a.S | 1 +
common/arm/pixel-a.S | 11 ++---------
common/arm/predict-a.S | 4 +---
common/arm/quant-a.S | 2 +-
configure | 10 +++++++---
7 files changed, 25 insertions(+), 16 deletions(-)
diff --git a/common/arm/asm.S b/common/arm/asm.S
index cbb2806..f7b9f14 100644
--- a/common/arm/asm.S
+++ b/common/arm/asm.S
@@ -46,6 +46,17 @@
#endif
.endm
+.macro movconst rd, val
+#ifdef HAVE_ARMV6T2
+ movw \rd, #:lower16:\val
+.if \val >> 16
+ movt \rd, #:upper16:\val
+.endif
+#else
+ ldr \rd, =\val
+#endif
+.endm
+
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
diff --git a/common/arm/deblock-a.S b/common/arm/deblock-a.S
index da1a316..6d60242 100644
--- a/common/arm/deblock-a.S
+++ b/common/arm/deblock-a.S
@@ -22,6 +22,8 @@
#include "asm.S"
+.fpu neon
+
.macro h264_loop_filter_start
ldr ip, [sp]
ldr ip, [ip]
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index 33b8dad..afd881c 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -135,6 +135,7 @@ memcpy_table:
.word memcpy_aligned_8_8_neon
.text
+.ltorg
// void x264_memzero_aligned( void *dst, size_t n )
function x264_memzero_aligned_neon, export=1
diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
index d985487..effe939 100644
--- a/common/arm/pixel-a.S
+++ b/common/arm/pixel-a.S
@@ -200,12 +200,6 @@ SAD_FUNC 16, 16, _aligned, ,:128
.macro SAD_FUNC_DUAL w, h
function x264_pixel_sad_aligned_\w\()x\h\()_neon_dual, export=1
-.if \w == 16
- .set r, \h / 2 - 2
-.else
- .set r, \h / 2 - 1
-.endif
-
SAD_DUAL_START_\w
.rept \h / 2 - \w / 8
SAD_DUAL_\w
@@ -1209,9 +1203,8 @@ function x264_pixel_ssim_end4_neon, export=1
vshl.s32 q2, q2, #6
vadd.s32 q1, q8, q8
- mov r3, #416 // ssim_c1= .01*.01*255*255*64
- movw ip, #39355 // ssim_c2= .03*.03*255*255*64*63 - 3<<16
- movt ip, #3
+ mov r3, #416 // ssim_c1 = .01*.01*255*255*64
+ movconst ip, 235963 // ssim_c2 = .03*.03*255*255*64*63
vdup.32 q14, r3
vdup.32 q15, ip
diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S
index 46e687b..8ff61a2 100644
--- a/common/arm/predict-a.S
+++ b/common/arm/predict-a.S
@@ -102,7 +102,7 @@ function x264_predict_4x4_ddr_armv6, export=1
add r4, r4, r3, lsl #8
add r5, r5, r4, lsl #8
add r6, r6, r5, lsl #8
- ldr ip, pb_1
+ ldr ip, =0x01010101
PRED4x4_LOWPASS r1, r2, r3, r4, r5, r6, ip
str r1, [r0, #0*FDEC_STRIDE]
lsl r2, r1, #8
@@ -118,8 +118,6 @@ function x264_predict_4x4_ddr_armv6, export=1
pop {r4-r6,pc}
.endfunc
-pb_1: .word 0x01010101
-
function x264_predict_4x4_ddl_neon, export=1
sub r0, #FDEC_STRIDE
mov ip, #FDEC_STRIDE
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index 81ec1b1..f348e5f 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -124,7 +124,7 @@ function x264_quant_8x8_neon, export=1
.endfunc
.macro DEQUANT_START mf_size offset dc=no
- movw r3, #0x2b
+ mov r3, #0x2b
mul r3, r3, r2
lsr r3, r3, #8 // i_qbits = i_qp / 6
add ip, r3, r3, lsl #1
diff --git a/configure b/configure
index 767570a..c39f67c 100755
--- a/configure
+++ b/configure
@@ -305,7 +305,7 @@ esac
cc_check || die "No working C compiler found."
-if [ $shared = yes -a \( $ARCH = "X86_64" -o $ARCH = "PPC" -o $ARCH = "ALPHA" \) ] ; then
+if [ $shared = yes -a \( $ARCH = "X86_64" -o $ARCH = "PPC" -o $ARCH = "ALPHA" -o $ARCH = "ARM" \) ] ; then
pic="yes"
fi
@@ -332,12 +332,12 @@ if [ $asm = yes -a $ARCH = ARM ] ; then
# set flags so neon is built by default
echo $CFLAGS | grep -Eq '(-mcpu|-march|-mfpu|-mfloat-abi)' || CFLAGS="$CFLAGS -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp"
- if cc_check '' '' 'asm("rev r0, r0");' ; then CFLAGS="$CFLAGS -DHAVE_ARMV6"
+ if cc_check '' '' 'asm("rev ip, ip");' ; then CFLAGS="$CFLAGS -DHAVE_ARMV6"
cc_check '' '' 'asm("movt r0, #0");' && CFLAGS="$CFLAGS -DHAVE_ARMV6T2"
cc_check '' '' 'asm("vadd.i16 q0, q0, q0");' && CFLAGS="$CFLAGS -DHAVE_NEON"
ASFLAGS="$ASFLAGS $CFLAGS -c"
else
- echo "You specified a pre-ARMv6 CPU in your CFLAGS."
+ echo "You specified a pre-ARMv6 or Thumb-1 CPU in your CFLAGS."
echo "If you really want to run on such a CPU, configure with --disable-asm."
exit 1
fi
@@ -437,6 +437,10 @@ fi
if [ "$debug" = "yes" ]; then
CFLAGS="-O1 -g $CFLAGS"
+elif [ $ARCH = ARM ]; then
+ # arm-gcc-4.2 produces incorrect output with -ffast-math
+ # and it doesn't save any speed anyway on 4.4, so disable it
+ CFLAGS="-O4 -fno-fast-math $CFLAGS"
else
CFLAGS="-O4 -ffast-math $CFLAGS"
fi
More information about the x264-devel
mailing list