[x265] [PATCH 1 of 2] asm: reduce number of movd in dequant_normal
Min Chen
chenm003 at 163.com
Sat Sep 6 02:36:27 CEST 2014
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1409963761 25200
# Node ID c4dd39c9ad0b96fbf520f399de41e1e9b4b77c72
# Parent 8abcfdeeea2eab2e11da59002dad42dcf16aeab8
asm: reduce number of movd in dequant_normal
diff -r 8abcfdeeea2e -r c4dd39c9ad0b source/common/dct.cpp
--- a/source/common/dct.cpp Fri Sep 05 16:48:03 2014 -0700
+++ b/source/common/dct.cpp Fri Sep 05 17:36:01 2014 -0700
@@ -729,6 +729,7 @@
X265_CHECK(num <= 32 * 32, "dequant num %d too large\n", num);
X265_CHECK((num % 8) == 0, "dequant num %d not multiple of 8\n", num);
X265_CHECK(shift <= 10, "shift too large %d\n", shift);
+ X265_CHECK(((int)coef & 31) == 0, "dequant coef buffer not aligned\n");
int add, coeffQ;
diff -r 8abcfdeeea2e -r c4dd39c9ad0b source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Fri Sep 05 16:48:03 2014 -0700
+++ b/source/common/x86/pixel-util8.asm Fri Sep 05 17:36:01 2014 -0700
@@ -1040,21 +1040,18 @@
;-----------------------------------------------------------------------------
INIT_XMM sse4
cglobal dequant_normal, 5,5,5
- movd m1, r3 ; m1 = word [scale]
mova m2, [pw_1]
%if HIGH_BIT_DEPTH
cmp r3d, 32767
jle .skip
- psrld m1, 2
+ shr r3d, 2
sub r4d, 2
.skip:
%endif
movd m0, r4d ; m0 = shift
- xor r3d, r3d
- dec r4d
+ add r4d, 15
bts r3d, r4d
- movd m3, r3d
- punpcklwd m1, m3
+ movd m1, r3d
pshufd m1, m1, 0 ; m1 = dword [add scale]
; m0 = shift
; m1 = scale
@@ -1071,8 +1068,8 @@
pmovsxwd m3, m3
packssdw m4, m4
pmovsxwd m4, m4
- movu [r1], m3
- movu [r1 + 16], m4
+ mova [r1], m3
+ mova [r1 + 16], m4
add r0, 16
add r1, 32
More information about the x265-devel
mailing list