[x264-devel] x86: Utilize 3-arg instructions in AVX deblock

Henrik Gramner git at videolan.org
Mon May 22 00:02:14 CEST 2017


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Thu Apr 20 21:58:23 2017 +0200| [aaa9aa83a111ed6f1db253d5afa91c5fc844583f] | committer: Henrik Gramner

x86: Utilize 3-arg instructions in AVX deblock

Avoids some redundant register-register moves.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=aaa9aa83a111ed6f1db253d5afa91c5fc844583f
---

 common/x86/deblock-a.asm | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
index 9790fd26..abebafad 100644
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -906,9 +906,8 @@ DEBLOCK_LUMA_INTRA
     movq       m3, %4
     punpcklwd  m0, m2
     punpcklwd  m1, m3
-    mova       m2, m0
+    punpckhdq  m2, m0, m1
     punpckldq  m0, m1
-    punpckhdq  m2, m1
 
     movq       m4, %5
     movq       m6, %6
@@ -916,9 +915,8 @@ DEBLOCK_LUMA_INTRA
     movq       m7, %8
     punpcklwd  m4, m6
     punpcklwd  m5, m7
-    mova       m6, m4
+    punpckhdq  m6, m4, m5
     punpckldq  m4, m5
-    punpckhdq  m6, m5
 
     punpckhqdq m1, m0, m4
     punpckhqdq m3, m2, m6



More information about the x264-devel mailing list