[x265] [PATCH 306 of 307] x86: AVX512 fix intra_pred_ang16_mode_8_and_28 kernel placement

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:35:04 CEST 2018


# HG changeset patch
# User Jayashri Murugan <jayashri at multicorewareinc.com>
# Date 1517855940 28800
#      Mon Feb 05 10:39:00 2018 -0800
# Node ID 876b6e006f2080072c0684dbf75e7cfde974ba79
# Parent  b80e844209ecd0abc896df94306a5ef96b27b918
x86: AVX512 fix intra_pred_ang16_mode_8_and_28 kernel placement

diff -r b80e844209ec -r 876b6e006f20 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Tue Jan 30 12:13:46 2018 +0530
+++ b/source/common/x86/intrapred16.asm	Mon Feb 05 10:39:00 2018 -0800
@@ -11843,28 +11843,6 @@
     packusdw        m11, m3
     TRANSPOSE_STORE_AVX2 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 16
     ret
-cglobal intra_pred_ang16_8, 3,7,16
-    add         r2,        64
-    xor         r6d,       r6d
-    lea         r3,        [ang_table_avx2 + 15 * 32]
-    add         r1d,       r1d
-    lea         r4,        [r1 * 3]
-    vbroadcasti32x8  m15,  [pd_16]
-
-    call        ang16_mode_8_28
-    RET
-
-cglobal intra_pred_ang16_28, 3,7,16
-    xor         r6d,       r6d
-    inc         r6d
-    lea         r3,        [ang_table_avx2 + 15 * 32]
-    add         r1d,       r1d
-    lea         r4,        [r1 * 3]
-    vbroadcasti32x8  m15,  [pd_16]
-
-    call        ang16_mode_8_28
-    RET
-
 ;; angle 16, modes 7 and 29
 cglobal ang16_mode_7_29
     test            r6d, r6d
@@ -20312,9 +20290,31 @@
     call        ang16_mode_8_28
 
     add         r2,        4
-
     call        ang32_mode_8_28
     RET
+
+    cglobal intra_pred_ang16_8, 3,7,16
+    add         r2,        64
+    xor         r6d,       r6d
+    lea         r3,        [ang_table_avx2 + 15 * 32]
+    add         r1d,       r1d
+    lea         r4,        [r1 * 3]
+    vbroadcasti32x8  m15,  [pd_16]
+
+    call        ang16_mode_8_28
+    RET
+
+cglobal intra_pred_ang16_28, 3,7,16
+    xor         r6d,       r6d
+    inc         r6d
+    lea         r3,        [ang_table_avx2 + 15 * 32]
+    add         r1d,       r1d
+    lea         r4,        [r1 * 3]
+    vbroadcasti32x8  m15,  [pd_16]
+
+    call        ang16_mode_8_28
+    RET
+
 ;; angle 16, modes 7 and 29
 cglobal ang16_mode_7_29
     test            r6d, r6d


More information about the x265-devel mailing list