<div dir="ltr">The parent doesn't exist in the public repo, so I cannot apply this patch. Please rebase and send again.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Dec 8, 2014 at 3:29 PM,  <span dir="ltr"><<a href="mailto:aasaipriya@multicorewareinc.com" target="_blank">aasaipriya@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Aasaipriya Chandran <<a href="mailto:aasaipriya@multicorewareinc.com">aasaipriya@multicorewareinc.com</a>><br>
# Date 1418032695 -19800<br>
#      Mon Dec 08 15:28:15 2014 +0530<br>
# Node ID 040a02e1c463ac87d4dd9000bb6ea6604425c45e<br>
# Parent  909d343d68b3b0e34a1239085cf6acc4f2ad2f38<br>
chroma_hpp[8x8] for colorspace i420 in avx2: improve 541c->339c<br>
<br>
diff -r 909d343d68b3 -r 040a02e1c463 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp      Mon Dec 01 16:43:24 2014 +0530<br>
+++ b/source/common/x86/asm-primitives.cpp      Mon Dec 08 15:28:15 2014 +0530<br>
@@ -1823,6 +1823,8 @@<br>
         p.luma_vpp[LUMA_4x4] = x265_interp_8tap_vert_pp_4x4_avx2;<br>
<br>
         p.chroma[X265_CSP_I420].filter_hpp[CHROMA_4x4] = x265_interp_4tap_horiz_pp_4x4_avx2;<br>
+        p.chroma[X265_CSP_I420].filter_hpp[CHROMA_8x8] = x265_interp_4tap_horiz_pp_8x8_avx2;<br>
+<br>
     }<br>
 #endif // if HIGH_BIT_DEPTH<br>
 }<br>
diff -r 909d343d68b3 -r 040a02e1c463 source/common/x86/ipfilter8.asm<br>
--- a/source/common/x86/ipfilter8.asm   Mon Dec 01 16:43:24 2014 +0530<br>
+++ b/source/common/x86/ipfilter8.asm   Mon Dec 08 15:28:15 2014 +0530<br>
@@ -124,6 +124,9 @@<br>
<br>
 tab_c_64_n64:   times 8 db 64, -64<br>
<br>
+ALIGN 32<br>
+const interp_4tap_8x8_horiz_shuf,   dd 0, 4, 1, 5, 2, 6, 3, 7<br>
+<br>
<br>
 SECTION .text<br>
<br>
@@ -1248,6 +1251,72 @@<br>
     RET<br>
<br>
<br>
+INIT_YMM avx2<br>
+cglobal interp_4tap_horiz_pp_8x8, 4,6,6<br>
+    mov               r4d,    r4m<br>
+<br>
+%ifdef PIC<br>
+    lea               r5,           [tab_ChromaCoeff]<br>
+    vpbroadcastd      m0,           [r5 + r4 * 4]<br>
+%else<br>
+    vpbroadcastd      m0,           [tab_ChromaCoeff + r4 * 4]<br>
+%endif<br>
+<br>
+    movu              m1,           [tab_Tm]<br>
+    vpbroadcastd      m2,           [pw_1]<br>
+<br>
+    ; register map<br>
+    ; m0 - interpolate coeff<br>
+    ; m1 - shuffle order table<br>
+    ; m2 - constant word 1<br>
+<br>
+    sub               r0,           1<br>
+    mov               r4d,          2<br>
+<br>
+.loop:<br>
+    ; Row 0<br>
+    vbroadcasti128    m3,           [r0]                        ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]<br>
+    pshufb            m3,           m1<br>
+    pmaddubsw         m3,           m0<br>
+    pmaddwd           m3,           m2<br>
+<br>
+    ; Row 1<br>
+    vbroadcasti128    m4,           [r0 + r1]                        ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]<br>
+    pshufb            m4,           m1<br>
+    pmaddubsw         m4,           m0<br>
+    pmaddwd           m4,           m2<br>
+    packssdw          m3,           m4<br>
+    pmulhrsw          m3,           [pw_512]<br>
+    lea               r0,           [r0 + r1 * 2]<br>
+<br>
+    ; Row 2<br>
+    vbroadcasti128    m4,           [r0 ]                        ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]<br>
+    pshufb            m4,           m1<br>
+    pmaddubsw         m4,           m0<br>
+    pmaddwd           m4,           m2<br>
+<br>
+    ; Row 3<br>
+    vbroadcasti128    m5,           [r0 + r1]                        ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]<br>
+    pshufb            m5,           m1<br>
+    pmaddubsw         m5,           m0<br>
+    pmaddwd           m5,           m2<br>
+    packssdw          m4,           m5<br>
+    pmulhrsw          m4,           [pw_512]<br>
+<br>
+    packuswb          m3,           m4<br>
+    mova              m5,           [interp_4tap_8x8_horiz_shuf]<br>
+    vpermd            m3,           m5,     m3<br>
+    vextracti128      xm4,          m3,     1<br>
+    movq              [r2],         xm3<br>
+    movhps            [r2 + r3],    xm3<br>
+    lea               r2,           [r2 + r3 * 2]<br>
+    movq              [r2],         xm4<br>
+    movhps            [r2 + r3],    xm4<br>
+    lea               r2,           [r2 + r3 * 2]<br>
+    lea               r0,           [r0 + r1*2]<br>
+    dec               r4d<br>
+    jnz               .loop<br>
+    RET<br>
<br>
 ;--------------------------------------------------------------------------------------------------------------<br>
 ; void interp_8tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div>