[x264-devel] commit: Some cosmetics/cleanup (Jason Garrett-Glaser )
git version control
git at videolan.org
Sun May 10 10:16:42 CEST 2009
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Sun Apr 26 22:13:17 2009 -0700| [ecae94a0ee6689bec9d32d64cce64cab7790a984] | committer: Jason Garrett-Glaser
Some cosmetics/cleanup
Move some macros to x86util.asm that should have been there to begin with.
Fix a typo that didn't cause any issues.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=ecae94a0ee6689bec9d32d64cce64cab7790a984
---
common/x86/pixel-a.asm | 56 ------------------------------------------------
common/x86/x86util.asm | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
encoder/encoder.c | 2 +-
3 files changed, 57 insertions(+), 57 deletions(-)
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index bc7e00f..6a19c40 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -391,64 +391,8 @@ cglobal x264_pixel_var_8x8_sse2, 2,4,8
; SATD
;=============================================================================
-%macro TRANS_SSE2 5-6
-; TRANSPOSE2x2
-; %1: transpose width (d/q) - use SBUTTERFLY qdq for dq
-; %2: ord/unord (for compat with sse4, unused)
-; %3/%4: source regs
-; %5/%6: tmp regs
-%ifidn %1, d
-%define mask [mask_10 GLOBAL]
-%define shift 16
-%elifidn %1, q
-%define mask [mask_1100 GLOBAL]
-%define shift 32
-%endif
-%if %0==6 ; less dependency if we have two tmp
- mova m%5, mask ; ff00
- mova m%6, m%4 ; x5x4
- psll%1 m%4, shift ; x4..
- pand m%6, m%5 ; x5..
- pandn m%5, m%3 ; ..x0
- psrl%1 m%3, shift ; ..x1
- por m%4, m%5 ; x4x0
- por m%3, m%6 ; x5x1
-%else ; more dependency, one insn less. sometimes faster, sometimes not
- mova m%5, m%4 ; x5x4
- psll%1 m%4, shift ; x4..
- pxor m%4, m%3 ; (x4^x1)x0
- pand m%4, mask ; (x4^x1)..
- pxor m%3, m%4 ; x4x0
- psrl%1 m%4, shift ; ..(x1^x4)
- pxor m%5, m%4 ; x5x1
- SWAP %4, %3, %5
-%endif
-%endmacro
-
%define TRANS TRANS_SSE2
-%macro TRANS_SSE4 5-6 ; see above
-%ifidn %1, d
- mova m%5, m%3
-%ifidn %2, ord
- psrl%1 m%3, 16
-%endif
- pblendw m%3, m%4, 10101010b
- psll%1 m%4, 16
-%ifidn %2, ord
- pblendw m%4, m%5, 01010101b
-%else
- psrl%1 m%5, 16
- por m%4, m%5
-%endif
-%elifidn %1, q
- mova m%5, m%3
- shufps m%3, m%4, 10001000b
- shufps m%5, m%4, 11011101b
- SWAP %4, %5
-%endif
-%endmacro
-
%macro JDUP_SSE2 2
punpckldq %1, %2
; doesn't need to dup. sse2 does things by zero extending to words and full h_2d
diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm
index 8bfe552..cfd7767 100644
--- a/common/x86/x86util.asm
+++ b/common/x86/x86util.asm
@@ -222,6 +222,62 @@
SUMSUB_BADC %3, %7, %4, %8
%endmacro
+%macro TRANS_SSE2 5-6
+; TRANSPOSE2x2
+; %1: transpose width (d/q) - use SBUTTERFLY qdq for dq
+; %2: ord/unord (for compat with sse4, unused)
+; %3/%4: source regs
+; %5/%6: tmp regs
+%ifidn %1, d
+%define mask [mask_10 GLOBAL]
+%define shift 16
+%elifidn %1, q
+%define mask [mask_1100 GLOBAL]
+%define shift 32
+%endif
+%if %0==6 ; less dependency if we have two tmp
+ mova m%5, mask ; ff00
+ mova m%6, m%4 ; x5x4
+ psll%1 m%4, shift ; x4..
+ pand m%6, m%5 ; x5..
+ pandn m%5, m%3 ; ..x0
+ psrl%1 m%3, shift ; ..x1
+ por m%4, m%5 ; x4x0
+ por m%3, m%6 ; x5x1
+%else ; more dependency, one insn less. sometimes faster, sometimes not
+ mova m%5, m%4 ; x5x4
+ psll%1 m%4, shift ; x4..
+ pxor m%4, m%3 ; (x4^x1)x0
+ pand m%4, mask ; (x4^x1)..
+ pxor m%3, m%4 ; x4x0
+ psrl%1 m%4, shift ; ..(x1^x4)
+ pxor m%5, m%4 ; x5x1
+ SWAP %4, %3, %5
+%endif
+%endmacro
+
+%macro TRANS_SSE4 5-6 ; see above
+%ifidn %1, d
+ mova m%5, m%3
+%ifidn %2, ord
+ psrl%1 m%3, 16
+%endif
+ pblendw m%3, m%4, 10101010b
+ psll%1 m%4, 16
+%ifidn %2, ord
+ pblendw m%4, m%5, 01010101b
+%else
+ psrl%1 m%5, 16
+ por m%4, m%5
+%endif
+%elifidn %1, q
+ mova m%5, m%3
+ shufps m%3, m%4, 10001000b
+ shufps m%5, m%4, 11011101b
+ SWAP %4, %5
+%endif
+%endmacro
+
%macro HADAMARD 5-6
; %1=distance in words (0 for vertical pass, 1/2/4 for horizontal passes)
; %2=sumsub/max/amax (sum and diff / maximum / maximum of absolutes)
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 6c0fd34..76651c4 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -728,7 +728,7 @@ x264_t *x264_encoder_open ( x264_param_t *param )
x264_predict_8x8c_init( h->param.cpu, h->predict_8x8c );
x264_predict_8x8_init( h->param.cpu, h->predict_8x8, &h->predict_8x8_filter );
x264_predict_4x4_init( h->param.cpu, h->predict_4x4 );
- if( !h->param.b_cabac );
+ if( !h->param.b_cabac )
x264_init_vlc_tables();
x264_pixel_init( h->param.cpu, &h->pixf );
x264_dct_init( h->param.cpu, &h->dctf );
More information about the x264-devel
mailing list