[x265] [PATCH] asm: 16bpp support for quant and dequant_normal

chen chenm003 at 163.com
Fri Feb 14 14:47:05 CET 2014


>@@ -1154,14 +1154,18 @@
> ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
> ;-----------------------------------------------------------------------------
> INIT_XMM sse4
>-cglobal dequant_normal, 2,5,8
>-    movd        m1, r3m             ; m1 = word [scale]
>+cglobal dequant_normal, 4,6,5
>+    movd        m1, r3             ; m1 = word [scale]
>+    cmp         r3d, 255
>+    jle         .skip
>+    psrld       m1, 2
>+.skip:
>     mov         r4d, r4m
>     movd        m0, r4d             ; m0 = shift
>-    xor         r3d, r3d
>+    xor         r5d, r5d
>     dec         r4d
>-    bts         r3d, r4d
>-    movd        m2, r3d
>+    bts         r5d, r4d
>+    movd        m2, r5d
>     punpcklwd   m1, m2
>     pshufd      m1, m1, 0           ; m1 = dword [add scale]
>     mova        m2, [pw_1]
>@@ -1174,6 +1178,10 @@
>     movu        m3, [r0]
>     movu        m4, [r0 + 16]
>     packssdw    m3, m4              ; m3 = clipQCoef
>+    cmp         r3d, 255
>+    jle         .skip1
>+    psllw       m3, 2
>+.skip1:
>     punpckhwd   m4, m3, m2
>     punpcklwd   m3, m2
>     pmaddwd     m3, m1              ; m3 = dword (clipQCoef * scale + add)

 
Don't use jmp on inner loop, it is low performance
you can do it like two code block and jmp on function head
 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140214/91856ee3/attachment-0001.html>


More information about the x265-devel mailing list