[x265] [PATCH 1 of 2] asm_arm: improve interp_8tap_vert_pp_4xN By: 1. remove unnecessary cache prefetch instructions pld 2. replace register r6 by lr
Min Chen
chenm003 at 163.com
Wed May 18 18:42:34 CEST 2016
# HG changeset patch
# User Min Chen <min.chen at multicorewareinc.com>
# Date 1463589738 18000
# Node ID 46c45f236ab0b25ec92a892f12315024eae2a11d
# Parent 28cf9adfc82e3816189b26aaeb907393b2a82ed8
asm_arm: improve interp_8tap_vert_pp_4xN By: 1. remove unnecessary cache prefetch instructions pld 2. replace register r6 by lr
Origin:
luma_vpp[ 4x4] 1.87x 45.23 84.41
luma_vpp[ 4x8] 2.10x 70.36 147.78
luma_vpp[ 4x16] 2.25x 121.24 272.18
Optimized:
luma_vpp[ 4x4] 1.98x 42.42 84.02
luma_vpp[ 4x8] 2.32x 63.70 147.49
luma_vpp[ 4x16] 2.51x 108.39 272.18
---
source/common/arm/ipfilter8.S | 31 +++++++++++--------------------
1 files changed, 11 insertions(+), 20 deletions(-)
diff -r 28cf9adfc82e -r 46c45f236ab0 source/common/arm/ipfilter8.S
--- a/source/common/arm/ipfilter8.S Wed May 18 02:01:34 2016 +0000
+++ b/source/common/arm/ipfilter8.S Wed May 18 11:42:18 2016 -0500
@@ -711,7 +711,7 @@
//**************luma_vpp************
.macro LUMA_VPP_4xN h
function x265_interp_8tap_vert_pp_4x\h\()_neon
- push {r4, r5, r6}
+ push {r4, r5, lr}
ldr r4, [sp, #4 * 3]
mov r5, r4, lsl #6
mov r4, r1, lsl #2
@@ -725,24 +725,16 @@
.loop_4x\h:
movrel r12, g_lumaFilter
add r12, r5
- mov r6, r0
+ mov lr, r0
- pld [r6]
- vld1.u32 d0[0], [r6], r1
- pld [r6]
- vld1.u32 d0[1], [r6], r1
- pld [r6]
- vld1.u32 d1[0], [r6], r1
- pld [r6]
- vld1.u32 d1[1], [r6], r1
- pld [r6]
- vld1.u32 d2[0], [r6], r1
- pld [r6]
- vld1.u32 d2[1], [r6], r1
- pld [r6]
- vld1.u32 d3[0], [r6], r1
- pld [r6]
- vld1.u32 d3[1], [r6], r1
+ vld1.u32 d0[0], [lr], r1
+ vld1.u32 d0[1], [lr], r1
+ vld1.u32 d1[0], [lr], r1
+ vld1.u32 d1[1], [lr], r1
+ vld1.u32 d2[0], [lr], r1
+ vld1.u32 d2[1], [lr], r1
+ vld1.u32 d3[0], [lr], r1
+ vld1.u32 d3[1], [lr], r1
veor.u8 q9, q9
@@ -795,8 +787,7 @@
subs r4, #1
bne .loop_4x\h
- pop {r4, r5, r6}
- bx lr
+ pop {r4, r5, pc}
.ltorg
endfunc
.endm
More information about the x265-devel
mailing list