[x264-devel] Minor asm changes
Jason Garrett-Glaser
git at videolan.org
Wed Mar 7 03:20:16 CET 2012
x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Tue Feb 14 15:07:10 2012 -0800| [ac31c59a98c6c690894670b9c9af2612f799d85b] | committer: Jason Garrett-Glaser
Minor asm changes
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=ac31c59a98c6c690894670b9c9af2612f799d85b
---
common/x86/deblock-a.asm | 11 +++++------
common/x86/quant-a.asm | 16 ++++++++--------
2 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
index c452e86..d03a9de 100644
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -2218,10 +2218,9 @@ cglobal deblock_h_chroma_intra_mbaff, 4,6,8
%endmacro
%macro LOAD_BYTES_XMM 1
- movu m0, [%1-4] ; FIXME could be aligned if we changed nnz's allocation
+ movu m2, [%1-4] ; FIXME could be aligned if we changed nnz's allocation
movu m1, [%1+12]
- mova m2, m0
- pslldq m0, 1
+ pslldq m0, m2, 1
shufps m2, m1, q3131 ; cur nnz, all rows
pslldq m1, 1
shufps m0, m1, q3131 ; left neighbors
@@ -2278,7 +2277,7 @@ cglobal deblock_strength, 6,6
RET
%macro DEBLOCK_STRENGTH_XMM 0
-cglobal deblock_strength, 6,6,8
+cglobal deblock_strength, 6,6,7
; Prepare mv comparison register
shl r4d, 8
add r4d, 3 - (1<<8)
@@ -2308,9 +2307,9 @@ cglobal deblock_strength, 6,6,8
mova m2, [mv+4*8*2]
mova m1, [mv+4*8*3]
palignr m3, m2, [mv+4*8*2-16], 12
- palignr m7, m1, [mv+4*8*3-16], 12
psubw m2, m3
- psubw m1, m7
+ palignr m3, m1, [mv+4*8*3-16], 12
+ psubw m1, m3
packsswb m2, m1
%else
movu m0, [mv-4+4*8*0]
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index 970811f..bbe2930 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -1311,9 +1311,9 @@ cglobal coeff_last64, 1,4
shl r0d, 16
or r1d, r2d
or r3d, r0d
- shl r3, 32
- or r1, r3
- not r1
+ shl r3, 32
+ or r1, r3
+ not r1
BSR rax, r1, 0x3f
RET
%endif
@@ -1348,14 +1348,14 @@ cglobal coeff_level_run%1,0,7
pxor m2, m2
LAST_MASK %1, t5d, t0-(%1&1)*SIZEOF_DCTCOEF, t4d
%if %1==15
- shr t5d, 1
+ shr t5d, 1
%elif %1==8
- and t5d, 0xff
+ and t5d, 0xff
%elif %1==4
- and t5d, 0xf
+ and t5d, 0xf
%endif
- xor t5d, (1<<%1)-1
- mov [t1+4], t5d
+ xor t5d, (1<<%1)-1
+ mov [t1+4], t5d
shl t5d, 32-%1
mov t4d, %1-1
LZCOUNT t3d, t5d, 0x1f
More information about the x264-devel
mailing list