[x265] [PATCH] asm: removed duplicate constants in intrapred8.asm 8bpp, these constants are already defined into const-a.asm

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Wed May 20 09:23:55 CEST 2015


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1432106560 -19800
#      Wed May 20 12:52:40 2015 +0530
# Node ID e451fccbee625fae6e337892ec7ca6c4e8779490
# Parent  55945e7be95e8fa3006afe0c37894a045daccd4a
asm: removed duplicate constants in intrapred8.asm 8bpp, these constants are already defined into const-a.asm

diff -r 55945e7be95e -r e451fccbee62 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Wed May 20 12:49:59 2015 +0530
+++ b/source/common/x86/intrapred8.asm	Wed May 20 12:52:40 2015 +0530
@@ -664,15 +664,6 @@
 ALIGN 32
 ;; (blkSize - 1 - x)
 pw_planar4_0:         dw 3,  2,  1,  0,  3,  2,  1,  0
-pw_planar4_1:         dw 3,  3,  3,  3,  3,  3,  3,  3
-pw_planar8_0:         dw 7,  6,  5,  4,  3,  2,  1,  0
-pw_planar8_1:         dw 7,  7,  7,  7,  7,  7,  7,  7
-pw_planar16_0:        dw 15, 14, 13, 12, 11, 10, 9,  8
-pw_planar16_1:        dw 15, 15, 15, 15, 15, 15, 15, 15
-pw_planar32_1:        dw 31, 31, 31, 31, 31, 31, 31, 31
-pw_planar32_L:        dw 31, 30, 29, 28, 27, 26, 25, 24
-pw_planar32_H:        dw 23, 22, 21, 20, 19, 18, 17, 16
-
 ALIGN 32
 c_ang8_mode_13:       db 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
                       db 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
@@ -712,9 +703,10 @@
 %endrep
 
 SECTION .text
-
 cextern pw_2
+cextern pw_3
 cextern pw_4
+cextern pw_7
 cextern pw_8
 cextern pw_16
 cextern pw_15
@@ -1149,9 +1141,8 @@
     pshufd          m3, m3, 0xAA
     pshufhw         m4, m2, 0               ; bottomLeft
     pshufd          m4, m4, 0xAA
-
     pmullw          m3, [multi_2Row]        ; (x + 1) * topRight
-    pmullw          m0, m1, [pw_planar4_1]  ; (blkSize - 1 - y) * above[x]
+    pmullw          m0, m1, [pw_3]          ; (blkSize - 1 - y) * above[x]
     paddw           m3, [pw_4]
     paddw           m3, m4
     paddw           m3, m0
@@ -1210,9 +1201,8 @@
     pshuflw         m4, m4, 0x00
     pshufd          m3, m3, 0x44
     pshufd          m4, m4, 0x44
-
     pmullw          m3, [multiL]            ; (x + 1) * topRight
-    pmullw          m0, m1, [pw_planar8_1]  ; (blkSize - 1 - y) * above[x]
+    pmullw          m0, m1, [pw_7]          ; (blkSize - 1 - y) * above[x]
     paddw           m3, [pw_8]
     paddw           m3, m4
     paddw           m3, m0
@@ -1226,7 +1216,7 @@
     pshufhw         m5, m2, 0x55 * (%1 - 4)
     pshufd          m5, m5, 0xAA
 %endif
-    pmullw          m5, [pw_planar8_0]
+    pmullw          m5, [pw_planar16_mul + mmsize]
     paddw           m5, m3
     psraw           m5, 4
     packuswb        m5, m5
@@ -1266,11 +1256,10 @@
     pshuflw         m6, m6, 0x00
     pshufd          m3, m3, 0x44                ; v_topRight
     pshufd          m6, m6, 0x44                ; v_bottomLeft
-
     pmullw          m4, m3, [multiH]            ; (x + 1) * topRight
     pmullw          m3, [multiL]                ; (x + 1) * topRight
-    pmullw          m1, m2, [pw_planar16_1]     ; (blkSize - 1 - y) * above[x]
-    pmullw          m5, m7, [pw_planar16_1]     ; (blkSize - 1 - y) * above[x]
+    pmullw          m1, m2, [pw_15]             ; (blkSize - 1 - y) * above[x]
+    pmullw          m5, m7, [pw_15]             ; (blkSize - 1 - y) * above[x]
     paddw           m4, [pw_16]
     paddw           m3, [pw_16]
     paddw           m4, m6
@@ -1308,8 +1297,8 @@
     paddw           m4, m1
     lea             r0, [r0 + r1]
 %endif
-    pmullw          m0, m5, [pw_planar8_0]
-    pmullw          m5, [pw_planar16_0]
+    pmullw          m0, m5, [pw_planar16_mul + mmsize]
+    pmullw          m5, [pw_planar16_mul]
     paddw           m0, m4
     paddw           m5, m3
     psraw           m5, 5
@@ -1368,8 +1357,7 @@
     mova            m8, m11
     mova            m9, m11
     mova            m10, m11
-
-    mova            m12, [pw_planar32_1]
+    mova            m12, [pw_31]
     movh            m4, [r2 + 1]
     punpcklbw       m4, m7
     psubw           m8, m4
@@ -1393,11 +1381,10 @@
     psubw           m11, m4
     pmullw          m4, m12
     paddw           m3, m4
-
-    mova            m12, [pw_planar32_L]
-    mova            m13, [pw_planar32_H]
-    mova            m14, [pw_planar16_0]
-    mova            m15, [pw_planar8_0]
+    mova            m12, [pw_planar32_mul]
+    mova            m13, [pw_planar32_mul + mmsize]
+    mova            m14, [pw_planar16_mul]
+    mova            m15, [pw_planar16_mul + mmsize]
 %macro PROCESS 1
     pmullw          m5, %1, m12
     pmullw          m6, %1, m13
@@ -1480,42 +1467,37 @@
     punpcklbw       m4, m7
     psubw           m5, m6, m4
     mova            [rsp + 0 * mmsize], m5
-    pmullw          m4, [pw_planar32_1]
+    pmullw          m4, [pw_31]
     paddw           m0, m4
-
     movh            m4, [r2 + 9]
     punpcklbw       m4, m7
     psubw           m5, m6, m4
     mova            [rsp + 1 * mmsize], m5
-    pmullw          m4, [pw_planar32_1]
+    pmullw          m4, [pw_31]
     paddw           m1, m4
-
     movh            m4, [r2 + 17]
     punpcklbw       m4, m7
     psubw           m5, m6, m4
     mova            [rsp + 2 * mmsize], m5
-    pmullw          m4, [pw_planar32_1]
+    pmullw          m4, [pw_31]
     paddw           m2, m4
-
     movh            m4, [r2 + 25]
     punpcklbw       m4, m7
     psubw           m5, m6, m4
     mova            [rsp + 3 * mmsize], m5
-    pmullw          m4, [pw_planar32_1]
+    pmullw          m4, [pw_31]
     paddw           m3, m4
-
 %macro PROCESS 1
-    pmullw          m5, %1, [pw_planar32_L]
-    pmullw          m6, %1, [pw_planar32_H]
+    pmullw          m5, %1, [pw_planar32_mul]
+    pmullw          m6, %1, [pw_planar32_mul + mmsize]
     paddw           m5, m0
     paddw           m6, m1
     psraw           m5, 6
     psraw           m6, 6
     packuswb        m5, m6
     movu            [r0], m5
-
-    pmullw          m5, %1, [pw_planar16_0]
-    pmullw          %1, [pw_planar8_0]
+    pmullw          m5, %1, [pw_planar16_mul]
+    pmullw          %1, [pw_planar16_mul + mmsize]
     paddw           m5, m2
     paddw           %1, m3
     psraw           m5, 6
@@ -2474,9 +2456,8 @@
     pshufd          m3, m3, 0xAA
     pshufhw         m4, m2, 0               ; bottomLeft
     pshufd          m4, m4, 0xAA
-
     pmullw          m3, [multi_2Row]        ; (x + 1) * topRight
-    pmullw          m0, m1, [pw_planar4_1]  ; (blkSize - 1 - y) * above[x]
+    pmullw          m0, m1, [pw_3]          ; (blkSize - 1 - y) * above[x]
     mova            m6, [pw_planar4_0]
     paddw           m3, [pw_4]
     paddw           m3, m4
@@ -2533,10 +2514,9 @@
     pshufb          m4, m0
     punpcklbw       m3, m0                  ; v_topRight
     punpcklbw       m4, m0                  ; v_bottomLeft
-
     pmullw          m3, [multiL]            ; (x + 1) * topRight
-    pmullw          m0, m1, [pw_planar8_1]  ; (blkSize - 1 - y) * above[x]
-    mova            m6, [pw_planar8_0]
+    pmullw          m0, m1, [pw_7]          ; (blkSize - 1 - y) * above[x]
+    mova            m6, [pw_planar16_mul + mmsize]
     paddw           m3, [pw_8]
     paddw           m3, m4
     paddw           m3, m0
@@ -2585,11 +2565,10 @@
     pshufb          m6, m0
     punpcklbw       m3, m0                      ; v_topRight
     punpcklbw       m6, m0                      ; v_bottomLeft
-
     pmullw          m4, m3, [multiH]            ; (x + 1) * topRight
     pmullw          m3, [multiL]                ; (x + 1) * topRight
-    pmullw          m1, m2, [pw_planar16_1]     ; (blkSize - 1 - y) * above[x]
-    pmullw          m5, m7, [pw_planar16_1]     ; (blkSize - 1 - y) * above[x]
+    pmullw          m1, m2, [pw_15]             ; (blkSize - 1 - y) * above[x]
+    pmullw          m5, m7, [pw_15]             ; (blkSize - 1 - y) * above[x]
     paddw           m4, [pw_16]
     paddw           m3, [pw_16]
     paddw           m4, m6
@@ -2620,8 +2599,8 @@
 %endif
 %endif
 %endif
-    pmullw          m0, m5, [pw_planar8_0]
-    pmullw          m5, [pw_planar16_0]
+    pmullw          m0, m5, [pw_planar16_mul + mmsize]
+    pmullw          m5, [pw_planar16_mul]
     paddw           m0, m4
     paddw           m5, m3
     paddw           m3, m6
@@ -2738,27 +2717,23 @@
     paddw           m1, [pw_32]
     paddw           m2, [pw_32]
     paddw           m3, [pw_32]
-
     pmovzxbw        m4, [r2 + 1]
-    pmullw          m5, m4, [pw_planar32_1]
+    pmullw          m5, m4, [pw_31]
     paddw           m0, m5
     psubw           m5, m6, m4
     mova            m8, m5
-
     pmovzxbw        m4, [r2 + 9]
-    pmullw          m5, m4, [pw_planar32_1]
+    pmullw          m5, m4, [pw_31]
     paddw           m1, m5
     psubw           m5, m6, m4
     mova            m9, m5
-
     pmovzxbw        m4, [r2 + 17]
-    pmullw          m5, m4, [pw_planar32_1]
+    pmullw          m5, m4, [pw_31]
     paddw           m2, m5
     psubw           m5, m6, m4
     mova            m10, m5
-
     pmovzxbw        m4, [r2 + 25]
-    pmullw          m5, m4, [pw_planar32_1]
+    pmullw          m5, m4, [pw_31]
     paddw           m3, m5
     psubw           m5, m6, m4
     mova            m11, m5
@@ -2768,9 +2743,8 @@
     movd            m4, [r2]
     pshufb          m4, m7
     punpcklbw       m4, m7
-
-    pmullw          m5, m4, [pw_planar32_L]
-    pmullw          m6, m4, [pw_planar32_H]
+    pmullw          m5, m4, [pw_planar32_mul]
+    pmullw          m6, m4, [pw_planar32_mul + mmsize]
     paddw           m5, m0
     paddw           m6, m1
     paddw           m0, m8
@@ -2779,9 +2753,8 @@
     psraw           m6, 6
     packuswb        m5, m6
     movu            [r0], m5
-
-    pmullw          m5, m4, [pw_planar16_0]
-    pmullw          m4, [pw_planar8_0]
+    pmullw          m5, m4, [pw_planar16_mul]
+    pmullw          m4, [pw_planar16_mul + mmsize]
     paddw           m5, m2
     paddw           m4, m3
     paddw           m2, m10


More information about the x265-devel mailing list