[x264-devel] x86inc: Make cpuflag() and notcpuflag() return 0 or 1

Henrik Gramner git at videolan.org
Sun Jan 17 22:17:53 CET 2016


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Wed Sep 30 23:17:00 2015 +0200| [8017b33454397d59b3285ec6d2ad35b6d0deb58a] | committer: Henrik Gramner

x86inc: Make cpuflag() and notcpuflag() return 0 or 1

Makes it possible to use them in arithmetic expressions.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=8017b33454397d59b3285ec6d2ad35b6d0deb58a
---

 common/x86/deblock-a.asm |    6 +-----
 common/x86/mc-a.asm      |    6 +-----
 common/x86/mc-a2.asm     |    8 ++++----
 common/x86/quant-a.asm   |    9 +--------
 common/x86/x86inc.asm    |    5 +++--
 5 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
index 2c351c7..a579e6d 100644
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -1266,11 +1266,7 @@ cglobal deblock_h_luma, 5,9,0,0x60+16*WIN64
     lea    r8, [r1*3]
     lea    r6, [r0-4]
     lea    r5, [r0-4+r8]
-%if WIN64
-    %define pix_tmp rsp+0x30 ; shadow space + r4
-%else
-    %define pix_tmp rsp
-%endif
+    %xdefine pix_tmp rsp+0x30*WIN64 ; shadow space + r4
 
     ; transpose 6x16 -> tmp space
     TRANSPOSE6x8_MEM  PASS8ROWS(r6, r5, r1, r8), pix_tmp
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index cd69c82..7ce396f 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -1912,11 +1912,7 @@ ALIGN 4
 
 %macro MC_CHROMA_SSSE3 0
 cglobal mc_chroma
-%if cpuflag(avx2)
-    MC_CHROMA_START 9
-%else
-    MC_CHROMA_START 10
-%endif
+    MC_CHROMA_START 10-cpuflag(avx2)
     and       r5d, 7
     and       t2d, 7
     mov       t0d, r5d
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 727e9c8..c0a49e6 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -2093,8 +2093,8 @@ MBTREE
 %endmacro
 
 ; FIXME: align loads to 16 bytes
-%macro MBTREE_AVX 1
-cglobal mbtree_propagate_cost, 6,6,%1
+%macro MBTREE_AVX 0
+cglobal mbtree_propagate_cost, 6,6,8-cpuflag(avx2)
     vbroadcastss m6, [r5]
     mov         r5d, r6m
     lea          r0, [r0+r5*2]
@@ -2165,9 +2165,9 @@ cglobal mbtree_propagate_cost, 6,6,%1
 %endmacro
 
 INIT_YMM avx
-MBTREE_AVX 8
+MBTREE_AVX
 INIT_YMM avx2
-MBTREE_AVX 7
+MBTREE_AVX
 
 %macro MBTREE_PROPAGATE_LIST 0
 ;-----------------------------------------------------------------------------
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index 188f3c1..305ae26 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -841,14 +841,7 @@ DEQUANT_DC w, pmullw
 ;-----------------------------------------------------------------------------
 
 %macro OPTIMIZE_CHROMA_2x2_DC 0
-%assign %%regs 5
-%if cpuflag(sse4)
-    %assign %%regs %%regs-1
-%endif
-%if ARCH_X86_64 == 0
-    %assign %%regs %%regs+1      ; t0-t4 are volatile on x86-64
-%endif
-cglobal optimize_chroma_2x2_dc, 0,%%regs,7
+cglobal optimize_chroma_2x2_dc, 0,6-cpuflag(sse4),7
     movifnidn t0, r0mp
     movd      m2, r1m
     movq      m1, [t0]
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index a0076ac..c703e29 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -761,8 +761,9 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
 %assign cpuflags_bmi1     (1<<22)|cpuflags_lzcnt
 %assign cpuflags_bmi2     (1<<23)|cpuflags_bmi1
 
-%define    cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
-%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
+; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
+%define    cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)
+%define notcpuflag(x) (cpuflag(x) ^ 1)
 
 ; Takes an arbitrary number of cpuflags from the above list.
 ; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.



More information about the x264-devel mailing list