[x265] [PATCH] denoise_dct asm code: SSE version

chen chenm003 at 163.com
Thu Sep 18 23:15:12 CEST 2014


right, just 16 to mmsize more flexible

At 2014-09-18 17:41:52,praveen at multicorewareinc.com wrote:
># HG changeset patch
># User Praveen Tiwari
># Date 1411033286 -19800
># Node ID c4b689f6050231e99b9663b7504cd7fff90bdafb
># Parent  54ad38a84a6900a7c674e6d1738fd31271129139
>denoise_dct asm code: SSE version
>
>diff -r 54ad38a84a69 -r c4b689f60502 source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp	Wed Sep 17 16:52:15 2014 +0530
>+++ b/source/common/x86/asm-primitives.cpp	Thu Sep 18 15:11:26 2014 +0530
>@@ -1689,6 +1689,7 @@
> 
>         p.dct[DCT_8x8] = x265_dct8_sse4;
>         p.copy_shr = x265_copy_shr_sse4;
>+        p.denoiseDct = x265_denoise_dct_sse4;
>     }
>     if (cpuMask & X265_CPU_AVX)
>     {
>diff -r 54ad38a84a69 -r c4b689f60502 source/common/x86/dct8.asm
>--- a/source/common/x86/dct8.asm	Wed Sep 17 16:52:15 2014 +0530
>+++ b/source/common/x86/dct8.asm	Thu Sep 18 15:11:26 2014 +0530
>@@ -4,6 +4,7 @@
> ;* Authors: Nabajit Deka <nabajit at multicorewareinc.com>
> ;*          Min Chen <chenm003 at 163.com> <min.chen at multicorewareinc.com>
> ;*          Li Cao <li at multicorewareinc.com>
>+;*          Praveen Kumar Tiwari <Praveen at multicorewareinc.com>
> ;*
> ;* This program is free software; you can redistribute it and/or modify
> ;* it under the terms of the GNU General Public License as published by
>@@ -1054,6 +1055,31 @@
>     RET
> 
> 
>+;-----------------------------------------------------------------------------
>+; void denoise_dct(int32_t *dct, uint32_t *sum, uint16_t *offset, int size)
>+;-----------------------------------------------------------------------------
>+INIT_XMM sse4
>+cglobal denoise_dct, 4, 4, 6
>+    pxor     m5,  m5
>+    shr      r3d, 2
>+.loop:
>+    mova     m0, [r0]
>+    pabsd    m1, m0
>+    mova     m2, [r1]
>+    paddd    m2, m1
>+    mova     [r1], m2
>+    pmovzxwd m3, [r2]
>+    psubd    m1, m3
>+    pcmpgtd  m4, m1, m5
>+    pand     m1, m4
>+    psignd   m1, m0
>+    mova     [r0], m1
>+    add      r0, 16
>+    add      r1, 16
>+    add      r2, 8
>+    dec      r3d
>+    jnz .loop
>+    RET
> 
> INIT_YMM avx2
> cglobal denoise_dct, 4,4,4
>diff -r 54ad38a84a69 -r c4b689f60502 source/common/x86/dct8.h
>--- a/source/common/x86/dct8.h	Wed Sep 17 16:52:15 2014 +0530
>+++ b/source/common/x86/dct8.h	Thu Sep 18 15:11:26 2014 +0530
>@@ -33,6 +33,7 @@
> void x265_dct16_avx2(int16_t *src, int32_t *dst, intptr_t stride);
> void x265_dct32_avx2(int16_t *src, int32_t *dst, intptr_t stride);
> 
>+void x265_denoise_dct_sse4(int32_t *dct, uint32_t *sum, uint16_t *offset, int size);
> void x265_denoise_dct_avx2(int32_t *dct, uint32_t *sum, uint16_t *offset, int size);
> 
> #endif // ifndef X265_DCT8_H
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140919/b308e813/attachment.html>


More information about the x265-devel mailing list