[x265] [PATCH 087 of 307] x86: AVX512 cleanup addAvg low bit depth code

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:31:25 CEST 2018


# HG changeset patch
# User Vignesh Vijayakumar
# Date 1502776459 -19800
#      Tue Aug 15 11:24:19 2017 +0530
# Node ID 2fa52ac34d8a8248d183fccfc78393c45a5f0839
# Parent  2db192bac0f14d55f7f82b8964d6c67c3a3637c3
x86: AVX512 cleanup addAvg low bit depth code

diff -r 2db192bac0f1 -r 2fa52ac34d8a source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm	Tue Aug 15 10:32:52 2017 +0530
+++ b/source/common/x86/mc-a.asm	Tue Aug 15 11:24:19 2017 +0530
@@ -46,13 +46,10 @@
     %error Unsupport bit depth!
 %endif
 
-SECTION_RODATA 32
-
-ch_shuf: times 2 db 0,2,2,4,4,6,6,8,1,3,3,5,5,7,7,9
-ch_shuf_adj: times 8 db 0
-             times 8 db 2
-             times 8 db 4
-             times 8 db 6
+SECTION_RODATA 64
+
+ALIGN 64
+const shuf_avx512,  dq 0, 2, 4, 6, 1, 3, 5, 7
 
 SECTION .text
 
@@ -3289,8 +3286,9 @@
 %macro PROCESS_ADDAVG_64x2_AVX512 0
     movu            m0, [r0]
     movu            m1, [r1]
-    movu            m2, [r0 + 64]
-    movu            m3, [r1 + 64]
+    movu            m2, [r0 + mmsize]
+    movu            m3, [r1 + mmsize]
+
     paddw           m0, m1
     pmulhrsw        m0, m4
     paddw           m0, m5
@@ -3299,14 +3297,14 @@
     paddw           m2, m5
 
     packuswb        m0, m2
-    vpermq          m0, m0, 11011000b
-    vshufi64x2      m0, m0, 11011000b
+    vpermq          m0, m6, m0
     movu            [r2], m0
 
     movu            m0, [r0 + r3]
     movu            m1, [r1 + r4]
-    movu            m2, [r0 + r3 + 64]
-    movu            m3, [r1 + r4 + 64]
+    movu            m2, [r0 + r3 + mmsize]
+    movu            m3, [r1 + r4 + mmsize]
+
     paddw           m0, m1
     pmulhrsw        m0, m4
     paddw           m0, m5
@@ -3315,8 +3313,7 @@
     paddw           m2, m5
 
     packuswb        m0, m2
-    vpermq          m0, m0, 11011000b
-    vshufi64x2      m0, m0, 11011000b
+    vpermq          m0, m6, m0
     movu            [r2 + r5], m0
 %endmacro
 
@@ -3325,9 +3322,11 @@
 ;--------------------------------------------------------------------------------------------------------------------
 %macro ADDAVG_W64_AVX512 1
 INIT_ZMM avx512
-cglobal addAvg_64x%1, 6,6,6
+cglobal addAvg_64x%1, 6,6,7
     vbroadcasti32x8 m4, [pw_256]
     vbroadcasti32x8 m5, [pw_128]
+    mova            m6, [shuf_avx512]
+
     add             r3, r3
     add             r4, r4
 


More information about the x265-devel mailing list