[x265] [PATCH] asm: 16bpp support for quant and dequant_normal

murugan at multicorewareinc.com murugan at multicorewareinc.com
Fri Feb 14 14:33:55 CET 2014


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1392384257 -19800
#      Fri Feb 14 18:54:17 2014 +0530
# Node ID 75e6e510877b685611c7083eca5433283c49f5a3
# Parent  423c7ff885b34a88628d32e4c26532f7664a93f7
asm: 16bpp support for quant and dequant_normal

diff -r 423c7ff885b3 -r 75e6e510877b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri Feb 14 18:52:13 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Feb 14 18:54:17 2014 +0530
@@ -743,6 +743,9 @@
     }
     if (cpuMask & X265_CPU_SSE4)
     {
+
+        p.quant = x265_quant_sse4;
+        p.dequant_normal = x265_dequant_normal_sse4;
         p.cvt16to32_shl = x265_cvt16to32_shl_sse4;
 
         p.intra_pred[BLOCK_4x4][0] = x265_intra_pred_planar4_sse4;
diff -r 423c7ff885b3 -r 75e6e510877b source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Fri Feb 14 18:52:13 2014 +0530
+++ b/source/common/x86/pixel-util8.asm	Fri Feb 14 18:54:17 2014 +0530
@@ -1071,18 +1071,18 @@
     mova        m4, [c_d_4]     ; m4 = [4 4 4 4]
 .loop:
     ; 4 coeff
-    movu        m0, [r0]        ; m1 = level
+    movu        m0, [r0]        ; m0 = level
     pxor        m1, m1
-    pcmpgtd     m1, m0          ; m2 = sign
-    movu        m2, [r1]        ; m3 = qcoeff
+    pcmpgtd     m1, m0          ; m1 = sign
+    movu        m2, [r1]        ; m2 = qcoeff
     pabsd       m0, m0
-    pmulld      m0, m2          ; m1 = tmpLevel1
+    pmulld      m0, m2          ; m0 = tmpLevel1
     paddd       m2, m0, addVec
-    psrad       m2, qbits       ; m3 = level1
+    psrad       m2, qbits       ; m2 = level1
     paddd       m7, m2
     pslld       m3, m2, qbits
     psubd       m0, m3
-    psrad       m0, qbits8      ; m1 = deltaU1
+    psrad       m0, qbits8      ; m0 = deltaU1
     movu        [r2], m0
 
     pxor        m0, m0
@@ -1099,18 +1099,18 @@
     movu        [r3], m2
 
     ; 4 coeff
-    movu        m0, [r0 + 16]   ; m1 = level
+    movu        m0, [r0 + 16]   ; m0 = level
     pxor        m1, m1
-    pcmpgtd     m1, m0          ; m2 = sign
-    movu        m2, [r1 + 16]   ; m3 = qcoeff
+    pcmpgtd     m1, m0          ; m1 = sign
+    movu        m2, [r1 + 16]   ; m2 = qcoeff
     pabsd       m0, m0
-    pmulld      m0, m2          ; m1 = tmpLevel1
+    pmulld      m0, m2          ; m0 = tmpLevel1
     paddd       m2, m0, addVec
-    psrad       m2, qbits       ; m3 = level1
+    psrad       m2, qbits       ; m2 = level1
     paddd       m7, m2
     pslld       m3, m2, qbits
     psubd       m0, m3
-    psrad       m0, qbits8      ; m1 = deltaU1
+    psrad       m0, qbits8      ; m0 = deltaU1
     movu        [r2 + 16], m0
 
     pxor        m0, m0
@@ -1154,14 +1154,18 @@
 ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
 ;-----------------------------------------------------------------------------
 INIT_XMM sse4
-cglobal dequant_normal, 2,5,8
-    movd        m1, r3m             ; m1 = word [scale]
+cglobal dequant_normal, 4,6,5
+    movd        m1, r3             ; m1 = word [scale]
+    cmp         r3d, 255
+    jle         .skip
+    psrld       m1, 2
+.skip:
     mov         r4d, r4m
     movd        m0, r4d             ; m0 = shift
-    xor         r3d, r3d
+    xor         r5d, r5d
     dec         r4d
-    bts         r3d, r4d
-    movd        m2, r3d
+    bts         r5d, r4d
+    movd        m2, r5d
     punpcklwd   m1, m2
     pshufd      m1, m1, 0           ; m1 = dword [add scale]
     mova        m2, [pw_1]
@@ -1174,6 +1178,10 @@
     movu        m3, [r0]
     movu        m4, [r0 + 16]
     packssdw    m3, m4              ; m3 = clipQCoef
+    cmp         r3d, 255
+    jle         .skip1
+    psllw       m3, 2
+.skip1:
     punpckhwd   m4, m3, m2
     punpcklwd   m3, m2
     pmaddwd     m3, m1              ; m3 = dword (clipQCoef * scale + add)


More information about the x265-devel mailing list