[x265] [PATCH 15 of 29] intrapred: fix 32-bit build crash due to stack broken for intra_ang8x8

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Tue Jan 13 08:11:23 CET 2015


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1420026015 -19800
#      Wed Dec 31 17:10:15 2014 +0530
# Node ID 38933694dc0880453bf34a8faee783ba0633e5cf
# Parent  9aa0b1574481b9ef18824f33837d9b1f0e554d8c
intrapred: fix 32-bit build crash due to stack broken for intra_ang8x8

diff -r 9aa0b1574481 -r 38933694dc08 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Mon Jan 12 12:56:56 2015 +0530
+++ b/source/common/x86/intrapred8.asm	Wed Dec 31 17:10:15 2014 +0530
@@ -1959,7 +1959,7 @@
     movh        [r0],      m0
     RET
 
-cglobal intra_pred_ang8_10_new, 3,5,5
+cglobal intra_pred_ang8_10_new, 3,6,5
     movh        m0,        [r2 + 17]
     mova        m4,        [pb_unpackbq]
     palignr     m1,        m0, 2
@@ -2118,20 +2118,21 @@
     packuswb    m1,        m0
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
 
-cglobal intra_pred_ang8_11_new, 3,6,8
-    mov         r4,        16
-    mov         r5,        0
+cglobal intra_pred_ang8_11_new, 3,5,8
+    xor         r4,        r4
     cmp         r3m,       byte 25
-    cmove       r4,        r5
-    lea         r3,        [ang_table + 23 * 16]
-    mova        m7,        [pw_1024]
-
-    movu        m0,        [r2 + r4]                  ; [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
+    mov         r3,        16
+    cmove       r3,        r4
+
+    movu        m0,        [r2 + r3]                  ; [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
     pinsrb      m0,        [r2], 0
     palignr     m1,        m0, 1                      ; [x 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1]
 
     punpcklbw   m0,        m1                         ; [8 7 7 6 6 5 5 4 4 3 3 2 2 1 1 0]
 
+    lea         r3,        [ang_table + 23 * 16]
+    mova        m7,        [pw_1024]
+
     pmaddubsw   m4,        m0, [r3 + 7 * 16]          ; [30]
     pmulhrsw    m4,        m7
     pmaddubsw   m3,        m0, [r3 + 5 * 16]          ; [28]
@@ -2205,18 +2206,18 @@
     packuswb    m6,        m2
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
 
-cglobal intra_pred_ang8_12_new, 4,6,8
-    mov         r4,        16
-    mov         r5,        0
+cglobal intra_pred_ang8_12_new, 3,5,8
+    xor         r4,        r4
     cmp         r3m,       byte 24
     mov         r3,        16
-    cmove       r4,        r5
-    cmove       r5,        r3
+    jz          .next
+    xchg        r3,        r4
+.next:
 
     movu        m1,        [r2 + r4]                  ; [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
     pinsrb      m1,        [r2], 0
     pslldq      m0,        m1, 1                      ; [14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 a]
-    pinsrb      m0,        [r2 + r5 + 6], 0
+    pinsrb      m0,        [r2 + r3 + 6], 0
 
     lea         r4,        [ang_table + 22 * 16]
     mova        m7,        [pw_1024]
@@ -2303,20 +2304,20 @@
     packuswb    m1,        m0
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
 
-cglobal intra_pred_ang8_13_new, 4,6,8
-    mov         r4,        16
-    mov         r5,        0
+cglobal intra_pred_ang8_13_new, 4,5,8
+    xor         r4,        r4
     cmp         r3m,       byte 23
     mov         r3,        16
-    cmove       r4,        r5
-    cmove       r5,        r3
+    jz          .next
+    xchg        r3,        r4
+.next:
 
     movu        m1,        [r2 +  r4]                 ; [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
     pinsrb      m1,        [r2], 0
     pslldq      m1,        1                          ; [14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 a]
-    pinsrb      m1,        [r2 + r5 + 4], 0
+    pinsrb      m1,        [r2 + r3 + 4], 0
     pslldq      m0,        m1, 1                      ; [13 12 11 10 9 8 7 6 5 4 3 2 1 0 a b]
-    pinsrb      m0,        [r2 + r5 + 7], 0
+    pinsrb      m0,        [r2 + r3 + 7], 0
     punpckhbw   m5,        m0, m1                     ; [14 13 13 12 12 11 11 10 10 9 9 8 8 7 7 6]
     punpcklbw   m0,        m1                         ; [6 5 5 4 4 3 3 2 2 1 1 0 0 a a b]
     palignr     m1,        m5, m0, 2                  ; [7 6 6 5 5 4 4 3 3 2 2 1 1 0 0 a]
@@ -2405,20 +2406,20 @@
     packuswb    m1,        m0
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
 
-cglobal intra_pred_ang8_14_new, 4,6,8
-    mov         r4,        16
-    mov         r5,        0
+cglobal intra_pred_ang8_14_new, 4,5,8
+    xor         r4,        r4
     cmp         r3m,       byte 22
     mov         r3,        16
-    cmove       r4,        r5
-    cmove       r5,        r3
+    jz          .next
+    xchg        r3,        r4
+.next:
 
     movu        m1,        [r2 + r4 - 2]              ; [13 12 11 10 9 8 7 6 5 4 3 2 1 0 a b]
     pinsrb      m1,        [r2], 2
-    pinsrb      m1,        [r2 + r5 + 2], 1
-    pinsrb      m1,        [r2 + r5 + 5], 0
+    pinsrb      m1,        [r2 + r3 + 2], 1
+    pinsrb      m1,        [r2 + r3 + 5], 0
     pslldq      m0,        m1, 1                      ; [12 11 10 9 8 7 6 5 4 3 2 1 0 a b c]
-    pinsrb      m0,        [r2 + r5 + 7], 0
+    pinsrb      m0,        [r2 + r3 + 7], 0
     punpckhbw   m2,        m0, m1                     ; [13 12 12 11 11 10 10 9 9 8 8 7 7 6 6 5]
     punpcklbw   m0,        m1                         ; [5 4 4 3 3 2 2 1 1 0 0 a a b b c]
     palignr     m1,        m2, m0, 2                  ; [6 5 5 4 4 3 3 2 2 1 1 0 0 a a b]
@@ -2510,21 +2511,21 @@
     packuswb    m1,        m0
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
 
-cglobal intra_pred_ang8_15_new, 4,6,8
-    mov         r4,        16
-    mov         r5,        0
+cglobal intra_pred_ang8_15_new, 4,5,8
+    xor         r4,        r4
     cmp         r3m,       byte 21
     mov         r3,        16
-    cmove       r4,        r5
-    cmove       r5,        r3
+    jz          .next
+    xchg        r3,        r4
+.next:
 
     movu        m1,        [r2 + r4]                  ; [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
     pinsrb      m1,        [r2], 0
-    movu        m2,        [r2 + r5]
+    movu        m2,        [r2 + r3]
     pshufb      m2,        [c_mode16_15]
     palignr     m1,        m2, 13                     ; [12 11 10 9 8 7 6 5 4 3 2 1 0 a b c]
     pslldq      m0,        m1, 1                      ; [11 10 9 8 7 6 5 4 3 2 1 0 a b c d]
-    pinsrb      m0,        [r2 + r5 + 8], 0
+    pinsrb      m0,        [r2 + r3 + 8], 0
     punpckhbw   m4,        m0, m1                     ; [12 11 11 10 10 9 9 8 8 7 7 6 6 5 5 4]
     punpcklbw   m0,        m1                         ; [4 3 3 2 2 1 1 0 0 a a b b c c d]
     palignr     m1,        m4, m0, 2                  ; [5 4 4 3 3 2 2 1 1 0 0 a a b b c]
@@ -2620,21 +2621,21 @@
     packuswb    m1,        m0
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
 
-cglobal intra_pred_ang8_16_new, 4,6,8
-    mov         r4,        16
-    mov         r5,        0
+cglobal intra_pred_ang8_16_new, 4,5,8
+    xor         r4,        r4
     cmp         r3m,       byte 20
     mov         r3,        16
-    cmove       r4,        r5
-    cmove       r5,        r3
+    jz          .next
+    xchg        r3,        r4
+.next:
 
     movu        m1,        [r2 + r4]                  ; [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
     pinsrb      m1,        [r2], 0
-    movu        m2,        [r2 + r5]
+    movu        m2,        [r2 + r3]
     pshufb      m2,        [c_mode16_16]
     palignr     m1,        m2, 12                     ; [11 10 9 8 7 6 5 4 3 2 1 0 a b c d]
     pslldq      m0,        m1, 1                      ; [10 9 8 7 6 5 4 3 2 1 0 a b c d e]
-    pinsrb      m0,        [r2 + r5 + 8], 0
+    pinsrb      m0,        [r2 + r3 + 8], 0
     punpckhbw   m4,        m0, m1                     ; [11 10 10 9 9 8 8 7 7 6 6 5 5 4 4 3]
     punpcklbw   m0,        m1                         ; [3 2 2 1 1 0 0 a a b b c c d d e]
     palignr     m1,        m4, m0, 2                  ; [4 3 3 2 2 1 1 0 0 a a b b c c d]
@@ -2734,21 +2735,21 @@
     packuswb    m1,        m0
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
 
-cglobal intra_pred_ang8_17_new, 4,6,8
-    mov         r4,        16
-    mov         r5,        0
+cglobal intra_pred_ang8_17_new, 4,5,8
+    xor         r4,        r4
     cmp         r3m,       byte 19
     mov         r3,        16
-    cmove       r4,        r5
-    cmove       r5,        r3
+    jz          .next
+    xchg        r3,        r4
+.next:
 
     movu        m2,        [r2 + r4]                  ; [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
     pinsrb      m2,        [r2], 0
-    movu        m1,        [r2 + r5]
+    movu        m1,        [r2 + r3]
     pshufb      m1,        [c_mode16_17]
     palignr     m2,        m1, 11                     ; [10 9 8 7 6 5 4 3 2 1 0 a b c d e]
     pslldq      m0,        m2, 1                      ; [9 8 7 6 5 4 3 2 1 0 a b c d e f]
-    pinsrb      m0,        [r2 + r5 + 7], 0
+    pinsrb      m0,        [r2 + r3 + 7], 0
     punpckhbw   m1,        m0, m2                     ; [10 9 9 8 8 7 7 6 6 5 5 4 4 3 3 2]
     punpcklbw   m0,        m2                         ; [2 1 1 0 0 a a b b c c d d e e f]
 


More information about the x265-devel mailing list