[x265] [PATCH] denoiseDct asm code: nit faulty code, need a new SSE version

praveen at multicorewareinc.com praveen at multicorewareinc.com
Wed Sep 17 13:32:26 CEST 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1410952504 -19800
# Node ID 530c1824c585870c07ba13623cb92b21637a8514
# Parent  a2dcc12bd36f41a99c346870cc4c23c1e313665b
denoiseDct asm code: nit faulty code, need a new SSE version

diff -r a2dcc12bd36f -r 530c1824c585 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Sep 17 16:33:52 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Sep 17 16:45:04 2014 +0530
@@ -1565,7 +1565,6 @@
         p.idct[IDCT_4x4] = x265_idct4_sse2;
         p.idct[IDST_4x4] = x265_idst4_sse2;
         p.planecopy_sp = x265_downShift_16_sse2;
-        //p.denoiseDct = x265_denoise_dct_sse2;
         p.copy_shl[BLOCK_4x4] = x265_copy_shl_4_sse2;
         p.copy_shl[BLOCK_8x8] = x265_copy_shl_8_sse2;
         p.copy_shl[BLOCK_16x16] = x265_copy_shl_16_sse2;
@@ -1605,7 +1604,6 @@
         p.dct[DST_4x4] = x265_dst4_ssse3;
         p.idct[IDCT_8x8] = x265_idct8_ssse3;
         p.count_nonzero = x265_count_nonzero_ssse3;
-        //p.denoiseDct = x265_denoise_dct_ssse3;
     }
     if (cpuMask & X265_CPU_SSE4)
     {
@@ -1709,7 +1707,6 @@
 
         p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_avx;
         p.ssim_end_4 = x265_pixel_ssim_end4_avx;
-        //p.denoiseDct = x265_denoise_dct_avx;
     }
     if (cpuMask & X265_CPU_XOP)
     {
diff -r a2dcc12bd36f -r 530c1824c585 source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm	Wed Sep 17 16:33:52 2014 +0530
+++ b/source/common/x86/dct8.asm	Wed Sep 17 16:45:04 2014 +0530
@@ -1054,102 +1054,6 @@
     RET
 
 
-; TODO: split into two version after coeff_t changed
-%if 1 ;HIGH_BIT_DEPTH
-;-----------------------------------------------------------------------------
-; void denoise_dct( int32_t *dct, uint32_t *sum, uint32_t *offset, int size )
-;-----------------------------------------------------------------------------
-%macro DENOISE_DCT 0
-cglobal denoise_dct, 4,4,6
-    pxor      m5, m5
-    movsxdifnidn r3, r3d
-.loop:
-    mova      m2, [r0+r3*4-2*mmsize]
-    mova      m3, [r0+r3*4-1*mmsize]
-    ABSD      m0, m2
-    ABSD      m1, m3
-    paddd     m4, m0, [r1+r3*4-2*mmsize]
-    psubd     m0, [r2+r3*4-2*mmsize]
-    mova      [r1+r3*4-2*mmsize], m4
-    paddd     m4, m1, [r1+r3*4-1*mmsize]
-    psubd     m1, [r2+r3*4-1*mmsize]
-    mova      [r1+r3*4-1*mmsize], m4
-    pcmpgtd   m4, m0, m5
-    pand      m0, m4
-    pcmpgtd   m4, m1, m5
-    pand      m1, m4
-    PSIGND    m0, m2
-    PSIGND    m1, m3
-    mova      [r0+r3*4-2*mmsize], m0
-    mova      [r0+r3*4-1*mmsize], m1
-    sub      r3d, mmsize/2
-    jg .loop
-    RET
-%endmacro
-
-%if ARCH_X86_64 == 0
-INIT_MMX mmx
-DENOISE_DCT
-%endif
-INIT_XMM sse2
-DENOISE_DCT
-INIT_XMM ssse3
-DENOISE_DCT
-INIT_XMM avx
-DENOISE_DCT
-INIT_YMM avx2
-DENOISE_DCT
-
-%else ; !HIGH_BIT_DEPTH
-
-;-----------------------------------------------------------------------------
-; void denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
-;-----------------------------------------------------------------------------
-%macro DENOISE_DCT 0
-cglobal denoise_dct, 4,4,7
-    pxor      m6, m6
-    movsxdifnidn r3, r3d
-.loop:
-    mova      m2, [r0+r3*2-2*mmsize]
-    mova      m3, [r0+r3*2-1*mmsize]
-    ABSW      m0, m2, sign
-    ABSW      m1, m3, sign
-    psubusw   m4, m0, [r2+r3*2-2*mmsize]
-    psubusw   m5, m1, [r2+r3*2-1*mmsize]
-    PSIGNW    m4, m2
-    PSIGNW    m5, m3
-    mova      [r0+r3*2-2*mmsize], m4
-    mova      [r0+r3*2-1*mmsize], m5
-    punpcklwd m2, m0, m6
-    punpcklwd m3, m1, m6
-    punpckhwd m0, m6
-    punpckhwd m1, m6
-    paddd     m2, [r1+r3*4-4*mmsize]
-    paddd     m0, [r1+r3*4-3*mmsize]
-    paddd     m3, [r1+r3*4-2*mmsize]
-    paddd     m1, [r1+r3*4-1*mmsize]
-    mova      [r1+r3*4-4*mmsize], m2
-    mova      [r1+r3*4-3*mmsize], m0
-    mova      [r1+r3*4-2*mmsize], m3
-    mova      [r1+r3*4-1*mmsize], m1
-    sub       r3, mmsize
-    jg .loop
-%if (mmsize == 8)
-    EMMS
-%endif
-    RET
-%endmacro
-
-%if ARCH_X86_64 == 0
-INIT_MMX mmx
-DENOISE_DCT
-%endif
-INIT_XMM sse2
-DENOISE_DCT
-INIT_XMM ssse3
-DENOISE_DCT
-INIT_XMM avx
-DENOISE_DCT
 
 INIT_YMM avx2
 cglobal denoise_dct, 4,4,4
@@ -1172,7 +1076,6 @@
     jg .loop
     RET
 
-%endif ; !HIGH_BIT_DEPTH
 
 %macro DCT16_PASS_1_E 2
     vpbroadcastq    m7,                [r7 + %1]


More information about the x265-devel mailing list