<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Feb 17, 2014 at 6:44 AM, <span dir="ltr"><<a href="mailto:nabajit@multicorewareinc.com" target="_blank">nabajit@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Nabajit Deka<br>
# Date 1392641037 -19800<br>
# Mon Feb 17 18:13:57 2014 +0530<br>
# Node ID f5275ca8f2985bb0daf563738e6071b81967c2cd<br>
# Parent ce96cdb390fe26aee6effa731e51303c1d9056b0<br>
asm : asm routine for chroma_p2s for 4:4:4 color space format<br></blockquote><div><br></div><div>Queued. There needs to be a comment somewhere about how the chroma_p2s 444 primitive is different from the others.</div><div>
</div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
diff -r ce96cdb390fe -r f5275ca8f298 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/x86/asm-primitives.cpp Mon Feb 17 18:13:57 2014 +0530<br>
@@ -1119,8 +1119,8 @@<br>
<br>
p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3;<br>
p.luma_p2s = x265_luma_p2s_ssse3;<br>
- p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_ssse3;<br>
p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3;<br>
+ p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_i444_ssse3;<br>
<br>
CHROMA_SP_FILTERS_420(_ssse3);<br>
CHROMA_SP_FILTERS_444(_ssse3);<br>
diff -r ce96cdb390fe -r f5275ca8f298 source/common/x86/ipfilter8.asm<br>
--- a/source/common/x86/ipfilter8.asm Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/x86/ipfilter8.asm Mon Feb 17 18:13:57 2014 +0530<br>
@@ -3680,6 +3680,64 @@<br>
<br>
RET<br>
<br>
+INIT_XMM ssse3<br>
+cglobal chroma_p2s_i444, 3, 7, 4<br>
+<br>
+ ; load width and height<br>
+ mov r3d, r3m<br>
+ mov r4d, r4m<br>
+<br>
+ ; load constant<br>
+ mova m2, [tab_c_128]<br>
+ mova m3, [tab_c_64_n64]<br>
+<br>
+.loopH:<br>
+<br>
+ xor r5d, r5d<br>
+.loopW:<br>
+ lea r6, [r0 + r5]<br>
+<br>
+ movh m0, [r6]<br>
+ punpcklbw m0, m2<br>
+ pmaddubsw m0, m3<br>
+<br>
+ movh m1, [r6 + r1]<br>
+ punpcklbw m1, m2<br>
+ pmaddubsw m1, m3<br>
+<br>
+ add r5d, 8<br>
+ cmp r5d, r3d<br>
+ lea r6, [r2 + r5 * 2]<br>
+ jg .width4<br>
+ movu [r6 + FENC_STRIDE * 0 - 16], m0<br>
+ movu [r6 + FENC_STRIDE * 2 - 16], m1<br>
+ je .nextH<br>
+ jmp .loopW<br>
+<br>
+.width4:<br>
+ test r3d, 4<br>
+ jz .width2<br>
+ test r3d, 2<br>
+ movh [r6 + FENC_STRIDE * 0 - 16], m0<br>
+ movh [r6 + FENC_STRIDE * 2 - 16], m1<br>
+ lea r6, [r6 + 8]<br>
+ pshufd m0, m0, 2<br>
+ pshufd m1, m1, 2<br>
+ jz .nextH<br>
+<br>
+.width2:<br>
+ movd [r6 + FENC_STRIDE * 0 - 16], m0<br>
+ movd [r6 + FENC_STRIDE * 2 - 16], m1<br>
+<br>
+.nextH:<br>
+ lea r0, [r0 + r1 * 2]<br>
+ add r2, FENC_STRIDE * 4<br>
+<br>
+ sub r4d, 2<br>
+ jnz .loopH<br>
+<br>
+ RET<br>
+<br>
%macro PROCESS_CHROMA_SP_W4_4R 0<br>
movq m0, [r0]<br>
movq m1, [r0 + r1]<br>
diff -r ce96cdb390fe -r f5275ca8f298 source/common/x86/ipfilter8.h<br>
--- a/source/common/x86/ipfilter8.h Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/x86/ipfilter8.h Mon Feb 17 18:13:57 2014 +0530<br>
@@ -214,6 +214,7 @@<br>
void x265_interp_8tap_hv_pp_8x8_ssse3(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int idxX, int idxY);<br>
void x265_luma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);<br>
void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);<br>
+void x265_chroma_p2s_i444_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);<br>
void x265_interp_4tap_vert_sp_2x4_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);<br>
void x265_interp_4tap_vert_sp_2x8_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);<br>
void x265_interp_4tap_vert_sp_6x8_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>