[x265] [PATCH 1 of 2] asm: reduce number of movd in dequant_normal

Min Chen chenm003 at 163.com
Sat Sep 6 02:36:27 CEST 2014


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1409963761 25200
# Node ID c4dd39c9ad0b96fbf520f399de41e1e9b4b77c72
# Parent  8abcfdeeea2eab2e11da59002dad42dcf16aeab8
asm: reduce number of movd in dequant_normal

diff -r 8abcfdeeea2e -r c4dd39c9ad0b source/common/dct.cpp
--- a/source/common/dct.cpp	Fri Sep 05 16:48:03 2014 -0700
+++ b/source/common/dct.cpp	Fri Sep 05 17:36:01 2014 -0700
@@ -729,6 +729,7 @@
     X265_CHECK(num <= 32 * 32, "dequant num %d too large\n", num);
     X265_CHECK((num % 8) == 0, "dequant num %d not multiple of 8\n", num);
     X265_CHECK(shift <= 10, "shift too large %d\n", shift);
+    X265_CHECK(((int)coef & 31) == 0, "dequant coef buffer not aligned\n");
 
     int add, coeffQ;
 
diff -r 8abcfdeeea2e -r c4dd39c9ad0b source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Fri Sep 05 16:48:03 2014 -0700
+++ b/source/common/x86/pixel-util8.asm	Fri Sep 05 17:36:01 2014 -0700
@@ -1040,21 +1040,18 @@
 ;-----------------------------------------------------------------------------
 INIT_XMM sse4
 cglobal dequant_normal, 5,5,5
-    movd        m1, r3              ; m1 = word [scale]
     mova        m2, [pw_1]
 %if HIGH_BIT_DEPTH
     cmp         r3d, 32767
     jle         .skip
-    psrld       m1, 2
+    shr         r3d, 2
     sub         r4d, 2
 .skip:
 %endif
     movd        m0, r4d             ; m0 = shift
-    xor         r3d, r3d
-    dec         r4d
+    add         r4d, 15
     bts         r3d, r4d
-    movd        m3, r3d
-    punpcklwd   m1, m3
+    movd        m1, r3d
     pshufd      m1, m1, 0           ; m1 = dword [add scale]
     ; m0 = shift
     ; m1 = scale
@@ -1071,8 +1068,8 @@
     pmovsxwd    m3, m3
     packssdw    m4, m4
     pmovsxwd    m4, m4
-    movu        [r1], m3
-    movu        [r1 + 16], m4
+    mova        [r1], m3
+    mova        [r1 + 16], m4
 
     add         r0, 16
     add         r1, 32



More information about the x265-devel mailing list