[x265] [PATCH] asm : remove chroma_p2s_i444, can be replaced by luma_p2s
nabajit at multicorewareinc.com
nabajit at multicorewareinc.com
Mon Mar 3 13:15:04 CET 2014
# HG changeset patch
# User Nabajit Deka
# Date 1393848896 -19800
# Mon Mar 03 17:44:56 2014 +0530
# Node ID 5e0879e805a24c1c376eee1dbc160f597b7909cd
# Parent 5e6e06b8ec118904ad28a2d703dc9ad7956b4d44
asm : remove chroma_p2s_i444, can be replaced by luma_p2s
diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Mar 03 17:27:42 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Mar 03 17:44:56 2014 +0530
@@ -1231,7 +1231,7 @@
p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3;
p.luma_p2s = x265_luma_p2s_ssse3;
p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3;
- p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_i444_ssse3; // full width dststride
+ p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_ssse3; // for i444 , chroma_p2s can be replaced by luma_p2s
p.dct[DST_4x4] = x265_dst4_ssse3;
}
diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Mon Mar 03 17:27:42 2014 +0530
+++ b/source/common/x86/ipfilter8.asm Mon Mar 03 17:44:56 2014 +0530
@@ -3680,64 +3680,6 @@
RET
-INIT_XMM ssse3
-cglobal chroma_p2s_i444, 3, 7, 4
-
- ; load width and height
- mov r3d, r3m
- mov r4d, r4m
-
- ; load constant
- mova m2, [tab_c_128]
- mova m3, [tab_c_64_n64]
-
-.loopH:
-
- xor r5d, r5d
-.loopW:
- lea r6, [r0 + r5]
-
- movh m0, [r6]
- punpcklbw m0, m2
- pmaddubsw m0, m3
-
- movh m1, [r6 + r1]
- punpcklbw m1, m2
- pmaddubsw m1, m3
-
- add r5d, 8
- cmp r5d, r3d
- lea r6, [r2 + r5 * 2]
- jg .width4
- movu [r6 + FENC_STRIDE * 0 - 16], m0
- movu [r6 + FENC_STRIDE * 2 - 16], m1
- je .nextH
- jmp .loopW
-
-.width4:
- test r3d, 4
- jz .width2
- test r3d, 2
- movh [r6 + FENC_STRIDE * 0 - 16], m0
- movh [r6 + FENC_STRIDE * 2 - 16], m1
- lea r6, [r6 + 8]
- pshufd m0, m0, 2
- pshufd m1, m1, 2
- jz .nextH
-
-.width2:
- movd [r6 + FENC_STRIDE * 0 - 16], m0
- movd [r6 + FENC_STRIDE * 2 - 16], m1
-
-.nextH:
- lea r0, [r0 + r1 * 2]
- add r2, FENC_STRIDE * 4
-
- sub r4d, 2
- jnz .loopH
-
- RET
-
%macro PROCESS_CHROMA_SP_W4_4R 0
movq m0, [r0]
movq m1, [r0 + r1]
diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h Mon Mar 03 17:27:42 2014 +0530
+++ b/source/common/x86/ipfilter8.h Mon Mar 03 17:44:56 2014 +0530
@@ -300,7 +300,6 @@
CHROMA_SS_FILTERS_SSE4(_sse4);
void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
-void x265_chroma_p2s_i444_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
#undef SETUP_CHROMA_FUNC_DEF
#undef SETUP_CHROMA_SP_FUNC_DEF
More information about the x265-devel
mailing list