[x265] [PATCH] dequant_normal asm code optimization as per new interface
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Sep 2 16:08:54 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1409637191 -19800
# Node ID 71b094ee56aaa0adb6e25789d289844c0820f062
# Parent 32abebf1dd44d8328a32e7441382e459733233b7
dequant_normal asm code optimization as per new interface
diff -r 32abebf1dd44 -r 71b094ee56aa source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Tue Sep 02 11:21:52 2014 +0530
+++ b/source/common/x86/pixel-util8.asm Tue Sep 02 11:23:11 2014 +0530
@@ -1002,7 +1002,7 @@
;-----------------------------------------------------------------------------
-; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
+; void dequant_normal(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift)
;-----------------------------------------------------------------------------
INIT_XMM sse4
cglobal dequant_normal, 5,5,5
@@ -1027,8 +1027,6 @@
; m2 = word [1]
.loop:
movu m3, [r0]
- movu m4, [r0 + 16]
- packssdw m3, m4 ; m3 = clipQCoef
punpckhwd m4, m3, m2
punpcklwd m3, m2
pmaddwd m3, m1 ; m3 = dword (clipQCoef * scale + add)
@@ -1042,7 +1040,7 @@
movu [r1], m3
movu [r1 + 16], m4
- add r0, 32
+ add r0, 16
add r1, 32
sub r2d, 8
More information about the x265-devel
mailing list