[x265] [PATCH] denoiseDct: SSE version of asm code
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Wed Sep 17 13:33:16 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1410953432 -19800
# Node ID e919c3dde6bd9a3b74177e48a14e8b151556caee
# Parent de0b737ed7165b4739128ee430f259ea0f8a5e81
denoiseDct: SSE version of asm code
diff -r de0b737ed716 -r e919c3dde6bd source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Sep 17 16:52:15 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Sep 17 17:00:32 2014 +0530
@@ -1689,6 +1689,7 @@
p.dct[DCT_8x8] = x265_dct8_sse4;
p.copy_shr = x265_copy_shr_sse4;
+ p.denoiseDct = x265_denoise_dct_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r de0b737ed716 -r e919c3dde6bd source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm Wed Sep 17 16:52:15 2014 +0530
+++ b/source/common/x86/dct8.asm Wed Sep 17 17:00:32 2014 +0530
@@ -1054,6 +1054,32 @@
RET
+;-----------------------------------------------------------------------------
+; void denoise_dct(int32_t *dct, uint32_t *sum, uint16_t *offset, int size)
+;-----------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal denoise_dct, 4, 4, 6
+ pxor m5, m5
+ shr r3d, 2
+.loop:
+ mova m0, [r0]
+ pabsd m1, m0
+ mova m2, [r1]
+ paddd m2, m1
+ mova [r1], m2
+ movh m2, [r2]
+ pmovzxwd m3, m2
+ psubd m1, m3
+ pcmpgtd m4, m1, m5
+ pand m1, m4
+ psignd m1, m0
+ mova [r0], m1
+ add r0, 16
+ add r1, 16
+ add r2, 8
+ dec r3d
+ jg .loop
+ RET
INIT_YMM avx2
cglobal denoise_dct, 4,4,4
diff -r de0b737ed716 -r e919c3dde6bd source/common/x86/dct8.h
--- a/source/common/x86/dct8.h Wed Sep 17 16:52:15 2014 +0530
+++ b/source/common/x86/dct8.h Wed Sep 17 17:00:32 2014 +0530
@@ -33,6 +33,7 @@
void x265_dct16_avx2(int16_t *src, int32_t *dst, intptr_t stride);
void x265_dct32_avx2(int16_t *src, int32_t *dst, intptr_t stride);
+void x265_denoise_dct_sse4(int32_t *dct, uint32_t *sum, uint16_t *offset, int size);
void x265_denoise_dct_avx2(int32_t *dct, uint32_t *sum, uint16_t *offset, int size);
#endif // ifndef X265_DCT8_H
More information about the x265-devel
mailing list