[x265] [PATCH] denoise_dct asm code: SSE version
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Thu Sep 18 11:41:52 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1411033286 -19800
# Node ID c4b689f6050231e99b9663b7504cd7fff90bdafb
# Parent 54ad38a84a6900a7c674e6d1738fd31271129139
denoise_dct asm code: SSE version
diff -r 54ad38a84a69 -r c4b689f60502 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Sep 17 16:52:15 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Sep 18 15:11:26 2014 +0530
@@ -1689,6 +1689,7 @@
p.dct[DCT_8x8] = x265_dct8_sse4;
p.copy_shr = x265_copy_shr_sse4;
+ p.denoiseDct = x265_denoise_dct_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r 54ad38a84a69 -r c4b689f60502 source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm Wed Sep 17 16:52:15 2014 +0530
+++ b/source/common/x86/dct8.asm Thu Sep 18 15:11:26 2014 +0530
@@ -4,6 +4,7 @@
;* Authors: Nabajit Deka <nabajit at multicorewareinc.com>
;* Min Chen <chenm003 at 163.com> <min.chen at multicorewareinc.com>
;* Li Cao <li at multicorewareinc.com>
+;* Praveen Kumar Tiwari <Praveen at multicorewareinc.com>
;*
;* This program is free software; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
@@ -1054,6 +1055,31 @@
RET
+;-----------------------------------------------------------------------------
+; void denoise_dct(int32_t *dct, uint32_t *sum, uint16_t *offset, int size)
+;-----------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal denoise_dct, 4, 4, 6
+ pxor m5, m5
+ shr r3d, 2
+.loop:
+ mova m0, [r0]
+ pabsd m1, m0
+ mova m2, [r1]
+ paddd m2, m1
+ mova [r1], m2
+ pmovzxwd m3, [r2]
+ psubd m1, m3
+ pcmpgtd m4, m1, m5
+ pand m1, m4
+ psignd m1, m0
+ mova [r0], m1
+ add r0, 16
+ add r1, 16
+ add r2, 8
+ dec r3d
+ jnz .loop
+ RET
INIT_YMM avx2
cglobal denoise_dct, 4,4,4
diff -r 54ad38a84a69 -r c4b689f60502 source/common/x86/dct8.h
--- a/source/common/x86/dct8.h Wed Sep 17 16:52:15 2014 +0530
+++ b/source/common/x86/dct8.h Thu Sep 18 15:11:26 2014 +0530
@@ -33,6 +33,7 @@
void x265_dct16_avx2(int16_t *src, int32_t *dst, intptr_t stride);
void x265_dct32_avx2(int16_t *src, int32_t *dst, intptr_t stride);
+void x265_denoise_dct_sse4(int32_t *dct, uint32_t *sum, uint16_t *offset, int size);
void x265_denoise_dct_avx2(int32_t *dct, uint32_t *sum, uint16_t *offset, int size);
#endif // ifndef X265_DCT8_H
More information about the x265-devel
mailing list