[x265] [PATCH] asm: remove macro vpbroadcastd and fix all of invalid case on vpbroadcastd
Min Chen
chenm003 at 163.com
Tue Aug 18 01:07:38 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1439847554 25200
# Node ID fd80d97fb195c0c9e870ad47a5d5f925083011ce
# Parent 996ebce8c874fc511d495cee227d24413e99d0c1
asm: remove macro vpbroadcastd and fix all of invalid case on vpbroadcastd
---
source/common/x86/pixel-util8.asm | 35 ++++++++++++++++++++++++++++-------
source/common/x86/x86inc.asm | 10 ----------
2 files changed, 28 insertions(+), 17 deletions(-)
diff -r 996ebce8c874 -r fd80d97fb195 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Mon Aug 17 10:52:15 2015 +0530
+++ b/source/common/x86/pixel-util8.asm Mon Aug 17 14:39:14 2015 -0700
@@ -633,7 +633,12 @@
movd xm6, r4d ; m6 = qbits8
; fill offset
+%if UNIX64 == 0
vpbroadcastd m5, r5m ; m5 = add
+%else ; Mac
+ movd xm5, r5m
+ vpbroadcastd m5, xm5 ; m5 = add
+%endif
lea r5, [pw_1]
@@ -705,7 +710,12 @@
movd xm6, r4d ; m6 = qbits8
; fill offset
- vpbroadcastd m5, r5m ; m5 = ad
+%if UNIX64 == 0
+ vpbroadcastd m5, r5m ; m5 = add
+%else ; Mac
+ movd xm5, r5m
+ vpbroadcastd m5, xm5 ; m5 = add
+%endif
lea r5, [pd_1]
@@ -823,7 +833,12 @@
INIT_YMM avx2
cglobal nquant, 3,5,7
+%if UNIX64 == 0
vpbroadcastd m4, r4m
+%else ; Mac
+ movd xm4, r4m
+ vpbroadcastd m4, xm4
+%endif
vpbroadcastd m6, [pw_1]
mov r4d, r5m
pxor m5, m5 ; m7 = numZero
@@ -1055,7 +1070,9 @@
movd xm0, r4d ; m0 = shift
add r4d, -1+16
bts r3d, r4d
- vpbroadcastd m1, r3d ; m1 = dword [add scale]
+
+ movd xm1, r3d
+ vpbroadcastd m1, xm1 ; m1 = dword [add scale]
; m0 = shift
; m1 = scale
@@ -1412,7 +1429,8 @@
shl r6d, 16 - correction
or r6d, r5d ; assuming both w0 and round are using maximum of 16 bits each.
- vpbroadcastd m0, r6d
+ movd xm0, r6d
+ vpbroadcastd m0, xm0
mov r5d, r7m
sub r5d, correction
@@ -1466,7 +1484,8 @@
shl r6d, 16
or r6d, r5d ; assuming both (w0<<6) and round are using maximum of 16 bits each.
- vpbroadcastd m0, r6d
+ movd xm0, r6d
+ vpbroadcastd m0, xm0
movd xm1, r7m
vpbroadcastd m2, r8m
@@ -1677,7 +1696,8 @@
mov r6d, r7m
shl r6d, 16
or r6d, r6m
- vpbroadcastd m3, r6d ; m3 = [round w0]
+ movd xm3, r6d
+ vpbroadcastd m3, xm3 ; m3 = [round w0]
movd xm4, r8m ; m4 = [shift]
vpbroadcastd m5, r9m ; m5 = [offset]
@@ -1793,8 +1813,9 @@
mov r7d, r7m
shl r7d, 16
or r7d, r6m
- vpbroadcastd m0, r7d ; m0 = times 8 dw w0, round
- movd xm1, r8m ; m1 = [shift]
+ movd xm0, r7d
+ vpbroadcastd m0, xm0 ; m0 = times 8 dw w0, round
+ movd xm1, r8m ; m1 = [shift]
vpbroadcastd m2, r9m ; m2 = times 16 dw offset
vpbroadcastw m3, [pw_1]
vpbroadcastw m4, [pw_2000]
diff -r 996ebce8c874 -r fd80d97fb195 source/common/x86/x86inc.asm
--- a/source/common/x86/x86inc.asm Mon Aug 17 10:52:15 2015 +0530
+++ b/source/common/x86/x86inc.asm Mon Aug 17 14:39:14 2015 -0700
@@ -1483,13 +1483,3 @@
%endif
%endmacro
%endif
-
-; workaround: vpbroadcastd with register, the yasm will generate wrong code
-%macro vpbroadcastd 2
- %ifid %2
- movd %1 %+ xmm, %2
- vpbroadcastd %1, %1 %+ xmm
- %else
- vpbroadcastd %1, %2
- %endif
-%endmacro
More information about the x265-devel
mailing list