[x264-devel] x86util: Reduce code size of high bit-depth AVX LOAD_DIFF

Henrik Gramner git at videolan.org
Tue Jan 24 21:14:12 CET 2017


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sun Jan 15 14:52:29 2017 +0100| [3c7bf52c5b0a849458a45b5628ed1cc4b898da5f] | committer: Henrik Gramner

x86util: Reduce code size of high bit-depth AVX LOAD_DIFF

AVX supports unaligned memory operands which makes the SATD code a bit denser.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=3c7bf52c5b0a849458a45b5628ed1cc4b898da5f
---

 common/x86/x86util.asm | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm
index c285f13..ea40bc8 100644
--- a/common/x86/x86util.asm
+++ b/common/x86/x86util.asm
@@ -741,25 +741,25 @@
 %if %6 ; %5 aligned?
     mova       %1, %4
     psubw      %1, %5
+%elif cpuflag(avx)
+    movu       %1, %4
+    psubw      %1, %5
 %else
     movu       %1, %4
     movu       %2, %5
     psubw      %1, %2
 %endif
 %else ; !HIGH_BIT_DEPTH
-%ifidn %3, none
     movh       %1, %4
     movh       %2, %5
+%ifidn %3, none
     punpcklbw  %1, %2
     punpcklbw  %2, %2
-    psubw      %1, %2
 %else
-    movh       %1, %4
     punpcklbw  %1, %3
-    movh       %2, %5
     punpcklbw  %2, %3
-    psubw      %1, %2
 %endif
+    psubw      %1, %2
 %endif ; HIGH_BIT_DEPTH
 %endmacro
 



More information about the x264-devel mailing list