[x265] [PATCH] asm : remove chroma_p2s_i444, can be replaced by luma_p2s

nabajit at multicorewareinc.com nabajit at multicorewareinc.com
Mon Mar 3 13:15:04 CET 2014


# HG changeset patch
# User Nabajit Deka
# Date 1393848896 -19800
#      Mon Mar 03 17:44:56 2014 +0530
# Node ID 5e0879e805a24c1c376eee1dbc160f597b7909cd
# Parent  5e6e06b8ec118904ad28a2d703dc9ad7956b4d44
asm : remove chroma_p2s_i444, can be replaced by luma_p2s

diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Mar 03 17:27:42 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Mon Mar 03 17:44:56 2014 +0530
@@ -1231,7 +1231,7 @@
         p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3;
         p.luma_p2s = x265_luma_p2s_ssse3;
         p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3;
-        p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_i444_ssse3; // full width dststride
+        p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_ssse3; // for i444 , chroma_p2s can be replaced by luma_p2s
 
         p.dct[DST_4x4] = x265_dst4_ssse3;
     }
diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Mon Mar 03 17:27:42 2014 +0530
+++ b/source/common/x86/ipfilter8.asm	Mon Mar 03 17:44:56 2014 +0530
@@ -3680,64 +3680,6 @@
 
     RET
 
-INIT_XMM ssse3
-cglobal chroma_p2s_i444, 3, 7, 4
-
-    ; load width and height
-    mov         r3d, r3m
-    mov         r4d, r4m
-
-    ; load constant
-    mova        m2, [tab_c_128]
-    mova        m3, [tab_c_64_n64]
-
-.loopH:
-
-    xor         r5d, r5d
-.loopW:
-    lea         r6, [r0 + r5]
-
-    movh        m0, [r6]
-    punpcklbw   m0, m2
-    pmaddubsw   m0, m3
-
-    movh        m1, [r6 + r1]
-    punpcklbw   m1, m2
-    pmaddubsw   m1, m3
-
-    add         r5d, 8
-    cmp         r5d, r3d
-    lea         r6, [r2 + r5 * 2]
-    jg          .width4
-    movu        [r6 + FENC_STRIDE * 0 - 16], m0
-    movu        [r6 + FENC_STRIDE * 2 - 16], m1
-    je          .nextH
-    jmp         .loopW
-
-.width4:
-    test        r3d, 4
-    jz          .width2
-    test        r3d, 2
-    movh        [r6 + FENC_STRIDE * 0 - 16], m0
-    movh        [r6 + FENC_STRIDE * 2 - 16], m1
-    lea         r6, [r6 + 8]
-    pshufd      m0, m0, 2
-    pshufd      m1, m1, 2
-    jz          .nextH
-
-.width2:
-    movd        [r6 + FENC_STRIDE * 0 - 16], m0
-    movd        [r6 + FENC_STRIDE * 2 - 16], m1
-
-.nextH:
-    lea         r0, [r0 + r1 * 2]
-    add         r2, FENC_STRIDE * 4
-
-    sub         r4d, 2
-    jnz         .loopH
-
-    RET
-
 %macro PROCESS_CHROMA_SP_W4_4R 0
     movq       m0, [r0]
     movq       m1, [r0 + r1]
diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h	Mon Mar 03 17:27:42 2014 +0530
+++ b/source/common/x86/ipfilter8.h	Mon Mar 03 17:44:56 2014 +0530
@@ -300,7 +300,6 @@
 CHROMA_SS_FILTERS_SSE4(_sse4);
 
 void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
-void x265_chroma_p2s_i444_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
 
 #undef SETUP_CHROMA_FUNC_DEF
 #undef SETUP_CHROMA_SP_FUNC_DEF


More information about the x265-devel mailing list