<div dir="ltr">The parent doesn't exist in the public repo, so I cannot apply this patch. Please rebase and send again.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Dec 8, 2014 at 3:29 PM, <span dir="ltr"><<a href="mailto:aasaipriya@multicorewareinc.com" target="_blank">aasaipriya@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Aasaipriya Chandran <<a href="mailto:aasaipriya@multicorewareinc.com">aasaipriya@multicorewareinc.com</a>><br>
# Date 1418032695 -19800<br>
# Mon Dec 08 15:28:15 2014 +0530<br>
# Node ID 040a02e1c463ac87d4dd9000bb6ea6604425c45e<br>
# Parent 909d343d68b3b0e34a1239085cf6acc4f2ad2f38<br>
chroma_hpp[8x8] for colorspace i420 in avx2: improve 541c->339c<br>
<br>
diff -r 909d343d68b3 -r 040a02e1c463 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Mon Dec 01 16:43:24 2014 +0530<br>
+++ b/source/common/x86/asm-primitives.cpp Mon Dec 08 15:28:15 2014 +0530<br>
@@ -1823,6 +1823,8 @@<br>
p.luma_vpp[LUMA_4x4] = x265_interp_8tap_vert_pp_4x4_avx2;<br>
<br>
p.chroma[X265_CSP_I420].filter_hpp[CHROMA_4x4] = x265_interp_4tap_horiz_pp_4x4_avx2;<br>
+ p.chroma[X265_CSP_I420].filter_hpp[CHROMA_8x8] = x265_interp_4tap_horiz_pp_8x8_avx2;<br>
+<br>
}<br>
#endif // if HIGH_BIT_DEPTH<br>
}<br>
diff -r 909d343d68b3 -r 040a02e1c463 source/common/x86/ipfilter8.asm<br>
--- a/source/common/x86/ipfilter8.asm Mon Dec 01 16:43:24 2014 +0530<br>
+++ b/source/common/x86/ipfilter8.asm Mon Dec 08 15:28:15 2014 +0530<br>
@@ -124,6 +124,9 @@<br>
<br>
tab_c_64_n64: times 8 db 64, -64<br>
<br>
+ALIGN 32<br>
+const interp_4tap_8x8_horiz_shuf, dd 0, 4, 1, 5, 2, 6, 3, 7<br>
+<br>
<br>
SECTION .text<br>
<br>
@@ -1248,6 +1251,72 @@<br>
RET<br>
<br>
<br>
+INIT_YMM avx2<br>
+cglobal interp_4tap_horiz_pp_8x8, 4,6,6<br>
+ mov r4d, r4m<br>
+<br>
+%ifdef PIC<br>
+ lea r5, [tab_ChromaCoeff]<br>
+ vpbroadcastd m0, [r5 + r4 * 4]<br>
+%else<br>
+ vpbroadcastd m0, [tab_ChromaCoeff + r4 * 4]<br>
+%endif<br>
+<br>
+ movu m1, [tab_Tm]<br>
+ vpbroadcastd m2, [pw_1]<br>
+<br>
+ ; register map<br>
+ ; m0 - interpolate coeff<br>
+ ; m1 - shuffle order table<br>
+ ; m2 - constant word 1<br>
+<br>
+ sub r0, 1<br>
+ mov r4d, 2<br>
+<br>
+.loop:<br>
+ ; Row 0<br>
+ vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]<br>
+ pshufb m3, m1<br>
+ pmaddubsw m3, m0<br>
+ pmaddwd m3, m2<br>
+<br>
+ ; Row 1<br>
+ vbroadcasti128 m4, [r0 + r1] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]<br>
+ pshufb m4, m1<br>
+ pmaddubsw m4, m0<br>
+ pmaddwd m4, m2<br>
+ packssdw m3, m4<br>
+ pmulhrsw m3, [pw_512]<br>
+ lea r0, [r0 + r1 * 2]<br>
+<br>
+ ; Row 2<br>
+ vbroadcasti128 m4, [r0 ] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]<br>
+ pshufb m4, m1<br>
+ pmaddubsw m4, m0<br>
+ pmaddwd m4, m2<br>
+<br>
+ ; Row 3<br>
+ vbroadcasti128 m5, [r0 + r1] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]<br>
+ pshufb m5, m1<br>
+ pmaddubsw m5, m0<br>
+ pmaddwd m5, m2<br>
+ packssdw m4, m5<br>
+ pmulhrsw m4, [pw_512]<br>
+<br>
+ packuswb m3, m4<br>
+ mova m5, [interp_4tap_8x8_horiz_shuf]<br>
+ vpermd m3, m5, m3<br>
+ vextracti128 xm4, m3, 1<br>
+ movq [r2], xm3<br>
+ movhps [r2 + r3], xm3<br>
+ lea r2, [r2 + r3 * 2]<br>
+ movq [r2], xm4<br>
+ movhps [r2 + r3], xm4<br>
+ lea r2, [r2 + r3 * 2]<br>
+ lea r0, [r0 + r1*2]<br>
+ dec r4d<br>
+ jnz .loop<br>
+ RET<br>
<br>
;--------------------------------------------------------------------------------------------------------------<br>
; void interp_8tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div>