[x265] [PATCH] asm: fix Intrapred_ang[32x32] mode 10 and 26 failure on Mac

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Thu Feb 6 12:13:24 CET 2014


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1391685162 -19800
#      Thu Feb 06 16:42:42 2014 +0530
# Node ID 7100c9c940088d416cc9918606eff9d3c5800cad
# Parent  ffe13a5eccb9d0b2950918bdbadcef50b26fe24a
asm: fix Intrapred_ang[32x32] mode 10 and 26 failure on Mac

diff -r ffe13a5eccb9 -r 7100c9c94008 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Thu Feb 06 12:28:32 2014 +0530
+++ b/source/common/x86/intrapred8.asm	Thu Feb 06 16:42:42 2014 +0530
@@ -3734,10 +3734,18 @@
 ; void intraPredAng32_10(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
 ;------------------------------------------------------------------------------------------------------------------
 INIT_XMM sse4
-cglobal intra_pred_ang32_10, 5,7,8
+cglobal intra_pred_ang32_10, 6,7,8,0-(2*mmsize)
+%define m8 [rsp + 0 * mmsize]
+%define m9 [rsp + 1 * mmsize]
     lea         r4, [r1 * 3]
     pxor        m7, m7
     mov         r6, 2
+    movu        m0, [r3]
+    movu        m1, [r3 + 1]
+    mova        m8, m0
+    mova        m9, m1
+    mov         r3d, r5d
+
 .loop:
     movu        m0, [r2 + 1]
     palignr     m1, m0, 1
@@ -3811,13 +3819,13 @@
     movu        [r5 + r4 + 16], m3
 
 ; filter
-    cmp         r5m, byte 0
+    cmp         r3d, byte 0
     jz         .quit
     movhlps     m1, m0
     pmovzxbw    m0, m0
     mova        m1, m0
-    movu        m2, [r3]
-    movu        m3, [r3 + 1]
+    movu        m2, m8
+    movu        m3, m9
 
     pshufb      m2, m7
     pmovzxbw    m2, m2
@@ -5433,9 +5441,17 @@
 ; void intraPredAng32_26(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
 ;------------------------------------------------------------------------------------------------------------------
 INIT_XMM sse4
-cglobal intra_pred_ang32_26, 4,7,7
+cglobal intra_pred_ang32_26, 6,7,7,0-(2*mmsize)
+%define m8 [rsp + 0 * mmsize]
+%define m9 [rsp + 1 * mmsize]
     lea         r4,             [r1 * 3]
     mov         r6,             2
+    movu        m0,             [r2]
+    movu        m1,             [r2 + 1]
+    mova        m8,             m0
+    mova        m9,             m1
+    mov         r2d,            r5d
+
 .loop:
     movu        m0,             [r3 + 1]
 
@@ -5495,15 +5511,15 @@
     movu        [r5 + r4],      m0
 
 ; filter
-    cmp         r5m, byte 0
+    cmp         r2d, byte 0
     jz         .quit
 
     pxor        m4,        m4
     pshufb      m0,        m4
     pmovzxbw    m0,        m0
     mova        m1,        m0
-    movu        m2,        [r2]
-    movu        m3,        [r2 + 1]
+    movu        m2,        m8
+    movu        m3,        m9
 
     pshufb      m2,        m4
     pmovzxbw    m2,        m2


More information about the x265-devel mailing list