[x264-devel] x86: Utilize 3-arg instructions in AVX deblock
Henrik Gramner
git at videolan.org
Mon May 22 00:02:14 CEST 2017
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Thu Apr 20 21:58:23 2017 +0200| [aaa9aa83a111ed6f1db253d5afa91c5fc844583f] | committer: Henrik Gramner
x86: Utilize 3-arg instructions in AVX deblock
Avoids some redundant register-register moves.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=aaa9aa83a111ed6f1db253d5afa91c5fc844583f
---
common/x86/deblock-a.asm | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
index 9790fd26..abebafad 100644
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -906,9 +906,8 @@ DEBLOCK_LUMA_INTRA
movq m3, %4
punpcklwd m0, m2
punpcklwd m1, m3
- mova m2, m0
+ punpckhdq m2, m0, m1
punpckldq m0, m1
- punpckhdq m2, m1
movq m4, %5
movq m6, %6
@@ -916,9 +915,8 @@ DEBLOCK_LUMA_INTRA
movq m7, %8
punpcklwd m4, m6
punpcklwd m5, m7
- mova m6, m4
+ punpckhdq m6, m4, m5
punpckldq m4, m5
- punpckhdq m6, m5
punpckhqdq m1, m0, m4
punpckhqdq m3, m2, m6
More information about the x264-devel
mailing list