[x265] [PATCH] asm: remove macro vpbroadcastd and fix all of invalid case on vpbroadcastd

Min Chen chenm003 at 163.com
Tue Aug 18 01:07:38 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1439847554 25200
# Node ID fd80d97fb195c0c9e870ad47a5d5f925083011ce
# Parent  996ebce8c874fc511d495cee227d24413e99d0c1
asm: remove macro vpbroadcastd and fix all of invalid case on vpbroadcastd
---
 source/common/x86/pixel-util8.asm |   35 ++++++++++++++++++++++++++++-------
 source/common/x86/x86inc.asm      |   10 ----------
 2 files changed, 28 insertions(+), 17 deletions(-)

diff -r 996ebce8c874 -r fd80d97fb195 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Mon Aug 17 10:52:15 2015 +0530
+++ b/source/common/x86/pixel-util8.asm	Mon Aug 17 14:39:14 2015 -0700
@@ -633,7 +633,12 @@
     movd            xm6, r4d            ; m6 = qbits8
 
     ; fill offset
+%if UNIX64 == 0
     vpbroadcastd    m5, r5m             ; m5 = add
+%else ; Mac
+    movd           xm5, r5m
+    vpbroadcastd    m5, xm5             ; m5 = add
+%endif
 
     lea             r5, [pw_1]
 
@@ -705,7 +710,12 @@
     movd            xm6, r4d        ; m6 = qbits8
 
     ; fill offset
-    vpbroadcastd    m5, r5m         ; m5 = ad
+%if UNIX64 == 0
+    vpbroadcastd    m5, r5m         ; m5 = add
+%else ; Mac
+    movd           xm5, r5m
+    vpbroadcastd    m5, xm5         ; m5 = add
+%endif
 
     lea             r5, [pd_1]
 
@@ -823,7 +833,12 @@
 
 INIT_YMM avx2
 cglobal nquant, 3,5,7
+%if UNIX64 == 0
     vpbroadcastd m4, r4m
+%else ; Mac
+    movd         xm4, r4m
+    vpbroadcastd m4, xm4
+%endif
     vpbroadcastd m6, [pw_1]
     mov         r4d, r5m
     pxor        m5, m5              ; m7 = numZero
@@ -1055,7 +1070,9 @@
     movd            xm0, r4d            ; m0 = shift
     add             r4d, -1+16
     bts             r3d, r4d
-    vpbroadcastd    m1, r3d             ; m1 = dword [add scale]
+
+    movd            xm1, r3d
+    vpbroadcastd    m1, xm1             ; m1 = dword [add scale]
 
     ; m0 = shift
     ; m1 = scale
@@ -1412,7 +1429,8 @@
     shl          r6d, 16 - correction
     or           r6d, r5d          ; assuming both w0 and round are using maximum of 16 bits each.
 
-    vpbroadcastd m0, r6d
+    movd         xm0, r6d
+    vpbroadcastd m0, xm0
 
     mov          r5d, r7m
     sub          r5d, correction
@@ -1466,7 +1484,8 @@
     shl          r6d, 16
     or           r6d, r5d          ; assuming both (w0<<6) and round are using maximum of 16 bits each.
 
-    vpbroadcastd m0, r6d
+    movd         xm0, r6d
+    vpbroadcastd m0, xm0
 
     movd         xm1, r7m
     vpbroadcastd m2, r8m
@@ -1677,7 +1696,8 @@
     mov                       r6d, r7m
     shl                       r6d, 16
     or                        r6d, r6m
-    vpbroadcastd              m3, r6d      ; m3 = [round w0]
+    movd                      xm3, r6d
+    vpbroadcastd              m3, xm3      ; m3 = [round w0]
     movd                      xm4, r8m     ; m4 = [shift]
     vpbroadcastd              m5, r9m      ; m5 = [offset]
 
@@ -1793,8 +1813,9 @@
     mov             r7d, r7m
     shl             r7d, 16
     or              r7d, r6m
-    vpbroadcastd    m0, r7d            ; m0 = times 8 dw w0, round
-    movd            xm1, r8m            ; m1 = [shift]
+    movd            xm0, r7d
+    vpbroadcastd    m0, xm0            ; m0 = times 8 dw w0, round
+    movd            xm1, r8m           ; m1 = [shift]
     vpbroadcastd    m2, r9m            ; m2 = times 16 dw offset
     vpbroadcastw    m3, [pw_1]
     vpbroadcastw    m4, [pw_2000]
diff -r 996ebce8c874 -r fd80d97fb195 source/common/x86/x86inc.asm
--- a/source/common/x86/x86inc.asm	Mon Aug 17 10:52:15 2015 +0530
+++ b/source/common/x86/x86inc.asm	Mon Aug 17 14:39:14 2015 -0700
@@ -1483,13 +1483,3 @@
 %endif
 %endmacro
 %endif
-
-; workaround: vpbroadcastd with register, the yasm will generate wrong code
-%macro vpbroadcastd 2
-  %ifid %2
-    movd         %1 %+ xmm, %2
-    vpbroadcastd %1, %1 %+ xmm
-  %else
-    vpbroadcastd %1, %2
-  %endif
-%endmacro



More information about the x265-devel mailing list