[x265] [PATCH] asm: optimize dct4
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue Aug 26 11:52:40 CEST 2014
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1409046621 -19800
# Tue Aug 26 15:20:21 2014 +0530
# Node ID bbd5b3f269b095760d21877e94d67df8bd72f479
# Parent 5acfb12ec5d17cc700e313fc99248e2408e5967b
asm: optimize dct4
diff -r 5acfb12ec5d1 -r bbd5b3f269b0 source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm Mon Aug 25 17:53:12 2014 +0900
+++ b/source/common/x86/dct8.asm Tue Aug 26 15:20:21 2014 +0530
@@ -30,6 +30,8 @@
SECTION_RODATA 32
+dct4_shuf: db 0, 1, 2, 3, 8, 9, 10, 11, 6, 7, 4, 5, 14, 15, 12, 13
+
tab_dct4: times 4 dw 64, 64
times 4 dw 83, 36
times 4 dw 64, -64
@@ -118,16 +120,14 @@
movh m0, [r0 + 0 * r2]
movh m1, [r0 + 1 * r2]
punpcklqdq m0, m1
- pshufd m0, m0, 0xD8
- pshufhw m0, m0, 0xB1
+ pshufb m0, [dct4_shuf]
lea r0, [r0 + 2 * r2]
movh m1, [r0]
movh m2, [r0 + r2]
punpcklqdq m1, m2
- pshufd m1, m1, 0xD8
- pshufhw m1, m1, 0xB1
+ pshufb m1, [dct4_shuf]
punpcklqdq m2, m0, m1
punpckhqdq m0, m1
@@ -140,8 +140,7 @@
paddd m3, m7
psrad m3, DCT_SHIFT
packssdw m0, m3
- pshufd m0, m0, 0xD8
- pshufhw m0, m0, 0xB1
+ pshufb m0, [dct4_shuf]
pmaddwd m1, m6
paddd m1, m7
psrad m1, DCT_SHIFT
@@ -149,9 +148,8 @@
paddd m2, m7
psrad m2, DCT_SHIFT
packssdw m1, m2
- pshufd m1, m1, 0xD8
- pshufhw m1, m1, 0xB1
+ pshufb m1, [dct4_shuf]
punpcklqdq m2, m0, m1
punpckhqdq m0, m1
More information about the x265-devel
mailing list