[x264-devel] x86inc: Make cpuflag() and notcpuflag() return 0 or 1
Henrik Gramner
git at videolan.org
Sun Jan 17 22:17:53 CET 2016
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Wed Sep 30 23:17:00 2015 +0200| [8017b33454397d59b3285ec6d2ad35b6d0deb58a] | committer: Henrik Gramner
x86inc: Make cpuflag() and notcpuflag() return 0 or 1
Makes it possible to use them in arithmetic expressions.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=8017b33454397d59b3285ec6d2ad35b6d0deb58a
---
common/x86/deblock-a.asm | 6 +-----
common/x86/mc-a.asm | 6 +-----
common/x86/mc-a2.asm | 8 ++++----
common/x86/quant-a.asm | 9 +--------
common/x86/x86inc.asm | 5 +++--
5 files changed, 10 insertions(+), 24 deletions(-)
diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
index 2c351c7..a579e6d 100644
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -1266,11 +1266,7 @@ cglobal deblock_h_luma, 5,9,0,0x60+16*WIN64
lea r8, [r1*3]
lea r6, [r0-4]
lea r5, [r0-4+r8]
-%if WIN64
- %define pix_tmp rsp+0x30 ; shadow space + r4
-%else
- %define pix_tmp rsp
-%endif
+ %xdefine pix_tmp rsp+0x30*WIN64 ; shadow space + r4
; transpose 6x16 -> tmp space
TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r1, r8), pix_tmp
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index cd69c82..7ce396f 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -1912,11 +1912,7 @@ ALIGN 4
%macro MC_CHROMA_SSSE3 0
cglobal mc_chroma
-%if cpuflag(avx2)
- MC_CHROMA_START 9
-%else
- MC_CHROMA_START 10
-%endif
+ MC_CHROMA_START 10-cpuflag(avx2)
and r5d, 7
and t2d, 7
mov t0d, r5d
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 727e9c8..c0a49e6 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -2093,8 +2093,8 @@ MBTREE
%endmacro
; FIXME: align loads to 16 bytes
-%macro MBTREE_AVX 1
-cglobal mbtree_propagate_cost, 6,6,%1
+%macro MBTREE_AVX 0
+cglobal mbtree_propagate_cost, 6,6,8-cpuflag(avx2)
vbroadcastss m6, [r5]
mov r5d, r6m
lea r0, [r0+r5*2]
@@ -2165,9 +2165,9 @@ cglobal mbtree_propagate_cost, 6,6,%1
%endmacro
INIT_YMM avx
-MBTREE_AVX 8
+MBTREE_AVX
INIT_YMM avx2
-MBTREE_AVX 7
+MBTREE_AVX
%macro MBTREE_PROPAGATE_LIST 0
;-----------------------------------------------------------------------------
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index 188f3c1..305ae26 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -841,14 +841,7 @@ DEQUANT_DC w, pmullw
;-----------------------------------------------------------------------------
%macro OPTIMIZE_CHROMA_2x2_DC 0
-%assign %%regs 5
-%if cpuflag(sse4)
- %assign %%regs %%regs-1
-%endif
-%if ARCH_X86_64 == 0
- %assign %%regs %%regs+1 ; t0-t4 are volatile on x86-64
-%endif
-cglobal optimize_chroma_2x2_dc, 0,%%regs,7
+cglobal optimize_chroma_2x2_dc, 0,6-cpuflag(sse4),7
movifnidn t0, r0mp
movd m2, r1m
movq m1, [t0]
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index a0076ac..c703e29 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -761,8 +761,9 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%assign cpuflags_bmi1 (1<<22)|cpuflags_lzcnt
%assign cpuflags_bmi2 (1<<23)|cpuflags_bmi1
-%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
-%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
+; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
+%define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)
+%define notcpuflag(x) (cpuflag(x) ^ 1)
; Takes an arbitrary number of cpuflags from the above list.
; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
More information about the x264-devel
mailing list