[x265] [PATCH] asm-16bpp: code for addAvg luma and chroma all sizes

chen chenm003 at 163.com
Wed Feb 19 18:44:36 CET 2014


At 2014-02-19 18:53:47,dnyaneshwar at multicorewareinc.com wrote:
># HG changeset patch
># User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
># Date 1392807092 -19800
>#      Wed Feb 19 16:21:32 2014 +0530
># Node ID cede20cde62ba0a96ac181bcf78a508097de0e7c
># Parent  6150985c3d535f0ea7a1dc0b8f3c69e65e30d25b
>asm-16bpp: code for addAvg luma and chroma all sizes
>
>+%if HIGH_BIT_DEPTH
>+INIT_XMM sse4
>+cglobal addAvg_2x4, 6,7,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride
>+    mova          m7,          [pw_16400]
>+    mova          m0,          [pw_1023]
m7 and m0 used just once, so merge address into instruction is shorter code size.
 
>+    add           r3,          r3
>+    add           r4,          r4
>+    add           r5,          r5
>+
>+    movd          m1,          [r0]
>+    movd          m2,          [r0 + r3]
>+    movd          m3,          [r1]
>+    movd          m4,          [r1 + r4]
>+
>+    punpckldq     m1,          m2
>+    punpckldq     m3,          m4
>+
>+    lea           r0,          [r0 + 2 * r3]
>+    lea           r1,          [r1 + 2 * r4]
>+
>+    movd          m2,          [r0]
>+    movd          m4,          [r0 + r3]
>+    movd          m5,          [r1]
>+    movd          m6,          [r1 + r4]
>+
>+    punpckldq     m2,          m4
>+    punpckldq     m5,          m6
>+    punpcklqdq    m1,          m2
>+    punpcklqdq    m3,          m5
>+
>+    paddw         m1,          m3
>+    paddw         m1,          m7
m7 is 16440, it is most possible to overflow, please do the dynamic range analyze here

>+    psraw         m1,          5
>+    pxor          m6,          m6
>+    pmaxsw        m1,          m6
>+    pminsw        m1,          m0
>+
>+    movd          [r2],        m1
>+    pextrd        [r2 + r5],   m1, 1
>+    lea           r2,          [r2 + 2 * r5]
>+    pextrd        [r2],        m1, 2
>+    pextrd        [r2 + r5],   m1, 3
>+
>+    RET
>+
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140220/d6638491/attachment.html>


More information about the x265-devel mailing list