<div dir="ltr">Ignore this patch. Need modifications for 16x16.<div><br></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Tue, Dec 3, 2013 at 7:08 PM, <span dir="ltr"><<a href="mailto:murugan@multicorewareinc.com" target="_blank">murugan@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Murugan Vairavel <<a href="mailto:murugan@multicorewareinc.com">murugan@multicorewareinc.com</a>><br>
# Date 1386077908 -19800<br>
# Tue Dec 03 19:08:28 2013 +0530<br>
# Node ID 1ae4e8ae04d0792db6590a62272990d83f49a265<br>
# Parent 126f3aefc79dad37e7985953c404ccff370d2729<br>
asm: 10bpp code for transpose 4x4 and 8x8<br>
<br>
diff -r 126f3aefc79d -r 1ae4e8ae04d0 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Tue Dec 03 18:33:13 2013 +0530<br>
+++ b/source/common/x86/asm-primitives.cpp Tue Dec 03 19:08:28 2013 +0530<br>
@@ -520,6 +520,9 @@<br>
p.sse_ss[LUMA_32x32] = x265_pixel_ssd_ss_32x32_sse2;<br>
p.sse_ss[LUMA_32x64] = x265_pixel_ssd_ss_32x64_sse2;<br>
<br>
+ p.transpose[BLOCK_4x4] = x265_transpose4_sse2;<br>
+ p.transpose[BLOCK_8x8] = x265_transpose8_sse2;<br>
+<br>
p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2;<br>
PIXEL_AVG(sse2);<br>
PIXEL_AVG_W4(mmx2);<br>
diff -r 126f3aefc79d -r 1ae4e8ae04d0 source/common/x86/pixel-util8.asm<br>
--- a/source/common/x86/pixel-util8.asm Tue Dec 03 18:33:13 2013 +0530<br>
+++ b/source/common/x86/pixel-util8.asm Tue Dec 03 19:08:28 2013 +0530<br>
@@ -830,7 +830,20 @@<br>
;-----------------------------------------------------------------<br>
INIT_XMM sse2<br>
cglobal transpose4, 3, 3, 4, dest, src, stride<br>
-<br>
+%if HIGH_BIT_DEPTH<br>
+ add r2, r2<br>
+ movh m0, [r1]<br>
+ movh m1, [r1 + r2]<br>
+ movh m2, [r1 + 2 * r2]<br>
+ lea r1, [r1 + 2 * r2]<br>
+ movh m3, [r1 + r2]<br>
+ punpcklwd m0, m1<br>
+ punpcklwd m2, m3<br>
+ punpckhdq m1, m0, m2<br>
+ punpckldq m0, m2<br>
+ movu [r0], m0<br>
+ movu [r0 + 16], m1<br>
+%else<br>
movd m0, [r1]<br>
movd m1, [r1 + r2]<br>
movd m2, [r1 + 2 * r2]<br>
@@ -841,26 +854,61 @@<br>
punpcklbw m2, m3<br>
punpcklwd m0, m2<br>
movu [r0], m0<br>
-<br>
+%endif<br>
RET<br>
<br>
;-----------------------------------------------------------------<br>
; void transpose_8x8(pixel *dst, pixel *src, intptr_t stride)<br>
;-----------------------------------------------------------------<br>
INIT_XMM sse2<br>
-cglobal transpose8, 3, 3, 8, dest, src, stride<br>
-<br>
+%if HIGH_BIT_DEPTH<br>
+%macro TRANSPOSE_4x4 1<br>
movh m0, [r1]<br>
movh m1, [r1 + r2]<br>
movh m2, [r1 + 2 * r2]<br>
lea r1, [r1 + 2 * r2]<br>
movh m3, [r1 + r2]<br>
- movh m4, [r1 + 2 * r2]<br>
- lea r1, [r1 + 2 * r2]<br>
+ punpcklwd m0, m1<br>
+ punpcklwd m2, m3<br>
+ punpckhdq m1, m0, m2<br>
+ punpckldq m0, m2<br>
+ movlps [r0], m0<br>
+ movhps [r0 + %1], m0<br>
+ movlps [r0 + 2 * %1], m1<br>
+ lea r0, [r0 + 2 * %1]<br>
+ movhps [r0 + %1], m1<br>
+%endmacro<br>
+cglobal transpose8_internal<br>
+ TRANSPOSE_4x4 r5<br>
+ lea r1, [r1 + 2 * r2]<br>
+ lea r0, [r3 + 8]<br>
+ TRANSPOSE_4x4 r5<br>
+ lea r1, [r4 + 8]<br>
+ lea r0, [r3 + 4 * r5]<br>
+ TRANSPOSE_4x4 r5<br>
+ lea r1, [r1 + 2 * r2]<br>
+ lea r0, [r3 + 8 + 4 * r5]<br>
+ TRANSPOSE_4x4 r5<br>
+ ret<br>
+cglobal transpose8, 3, 6, 4, dest, src, stride<br>
+ add r2, r2<br>
+ mov r3, r0<br>
+ mov r4, r1<br>
+ mov r5, 16<br>
+ call transpose8_internal<br>
+%else<br>
+cglobal transpose8, 3, 5, 8, dest, src, stride<br>
+ lea r3, [2 * r2]<br>
+ lea r4, [3 * r2]<br>
+ movh m0, [r1]<br>
+ movh m1, [r1 + r2]<br>
+ movh m2, [r1 + r3]<br>
+ movh m3, [r1 + r4]<br>
+ movh m4, [r1 + 4 * r2]<br>
+ lea r1, [r1 + 4 * r2]<br>
movh m5, [r1 + r2]<br>
- movh m6, [r1 + 2 * r2]<br>
- lea r1, [r1 + 2 * r2]<br>
- movh m7, [r1 + r2]<br>
+ movh m6, [r1 + r3]<br>
+ movh m7, [r1 + r4]<br>
<br>
punpcklbw m0, m1<br>
punpcklbw m2, m3<br>
@@ -880,7 +928,7 @@<br>
movu [r0 + 16], m2<br>
movu [r0 + 32], m1<br>
movu [r0 + 48], m3<br>
-<br>
+%endif<br>
RET<br>
<br>
%macro TRANSPOSE_8x8 1<br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br><div dir="ltr">With Regards,<div><br></div><div>Murugan. V</div><div>+919659287478</div></div>
</div>