[x265] [PATCH] dequant_normal asm code optimization as per new interface

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Sep 2 16:08:54 CEST 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1409637191 -19800
# Node ID 71b094ee56aaa0adb6e25789d289844c0820f062
# Parent  32abebf1dd44d8328a32e7441382e459733233b7
dequant_normal asm code optimization as per new interface

diff -r 32abebf1dd44 -r 71b094ee56aa source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Tue Sep 02 11:21:52 2014 +0530
+++ b/source/common/x86/pixel-util8.asm	Tue Sep 02 11:23:11 2014 +0530
@@ -1002,7 +1002,7 @@
 
 
 ;-----------------------------------------------------------------------------
-; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
+; void dequant_normal(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift)
 ;-----------------------------------------------------------------------------
 INIT_XMM sse4
 cglobal dequant_normal, 5,5,5
@@ -1027,8 +1027,6 @@
     ; m2 = word [1]
 .loop:
     movu        m3, [r0]
-    movu        m4, [r0 + 16]
-    packssdw    m3, m4              ; m3 = clipQCoef
     punpckhwd   m4, m3, m2
     punpcklwd   m3, m2
     pmaddwd     m3, m1              ; m3 = dword (clipQCoef * scale + add)
@@ -1042,7 +1040,7 @@
     movu        [r1], m3
     movu        [r1 + 16], m4
 
-    add         r0, 32
+    add         r0, 16
     add         r1, 32
 
     sub         r2d, 8


More information about the x265-devel mailing list