[x264-devel] x86util: Reduce code size of high bit-depth AVX LOAD_DIFF
Henrik Gramner
git at videolan.org
Tue Jan 24 21:14:12 CET 2017
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sun Jan 15 14:52:29 2017 +0100| [3c7bf52c5b0a849458a45b5628ed1cc4b898da5f] | committer: Henrik Gramner
x86util: Reduce code size of high bit-depth AVX LOAD_DIFF
AVX supports unaligned memory operands which makes the SATD code a bit denser.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=3c7bf52c5b0a849458a45b5628ed1cc4b898da5f
---
common/x86/x86util.asm | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm
index c285f13..ea40bc8 100644
--- a/common/x86/x86util.asm
+++ b/common/x86/x86util.asm
@@ -741,25 +741,25 @@
%if %6 ; %5 aligned?
mova %1, %4
psubw %1, %5
+%elif cpuflag(avx)
+ movu %1, %4
+ psubw %1, %5
%else
movu %1, %4
movu %2, %5
psubw %1, %2
%endif
%else ; !HIGH_BIT_DEPTH
-%ifidn %3, none
movh %1, %4
movh %2, %5
+%ifidn %3, none
punpcklbw %1, %2
punpcklbw %2, %2
- psubw %1, %2
%else
- movh %1, %4
punpcklbw %1, %3
- movh %2, %5
punpcklbw %2, %3
- psubw %1, %2
%endif
+ psubw %1, %2
%endif ; HIGH_BIT_DEPTH
%endmacro
More information about the x264-devel
mailing list