[x264-devel] aarch64: Simplify the decimate_score functions
Martin Storsjö
git at videolan.org
Sun Oct 11 19:01:02 CEST 2015
x264 | branch: master | Martin Storsjö <martin at martin.st> | Thu Aug 13 23:59:26 2015 +0300| [ef6034812162fc8b51bfd5e87387f405d1cc30cb] | committer: Henrik Gramner
aarch64: Simplify the decimate_score functions
After doing a left shift by the number of bits returned by clz,
only bits set to zero can be shifted out, so if the register
was nonzero to start with (which is checked), it can't become
zero here.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=ef6034812162fc8b51bfd5e87387f405d1cc30cb
---
common/aarch64/quant-a.S | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/common/aarch64/quant-a.S b/common/aarch64/quant-a.S
index 2e1558b..443a91d 100644
--- a/common/aarch64/quant-a.S
+++ b/common/aarch64/quant-a.S
@@ -328,17 +328,13 @@ function x264_decimate_score\size\()_neon, export=1
lsr x6, x3, #2
lsl x1, x1, x3
ldrb w7, [x5, x6]
- cbz x1, 2f
lsl x1, x1, #4
add w0, w0, w7
cbnz x1, 1b
ret
-2:
- add w0, w0, w7
-0:
- ret
9:
mov w0, #9
+0:
ret
endfunc
.endm
@@ -399,17 +395,13 @@ function x264_decimate_score64_neon, export=1
clz x3, x1
lsl x1, x1, x3
ldrb w7, [x5, x3]
- cbz x1, 2f
lsl x1, x1, #1
add w0, w0, w7
cbnz x1, 1b
ret
-2:
- add w0, w0, w7
-0:
- ret
9:
mov w0, #9
+0:
ret
endfunc
More information about the x264-devel
mailing list