<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Jul 6, 2018 at 2:48 PM, <span dir="ltr"><<a href="mailto:vignesh@multicorewareinc.com" target="_blank">vignesh@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Jayashree <<a href="mailto:jayashree.c@multicorewareinc.com">jayashree.c@multicorewareinc.<wbr>com</a>><br>
# Date 1524473214 -19800<br>
# Mon Apr 23 14:16:54 2018 +0530<br>
# Node ID 0106f9f2f867ee20893a317e98c60e<wbr>9ca626e7d2<br>
# Parent 52ec2b63b870a1ec0c4ce45abc89ac<wbr>522ade3c1f<br>
x86 : AVX2 Refactor luma_hps 8xN and 24x32<br>
<br>
diff -r 52ec2b63b870 -r 0106f9f2f867 source/common/x86/h-<wbr>ipfilter16.asm<br>
--- a/source/common/x86/h-<wbr>ipfilter16.asm Fri Apr 20 14:16:17 2018 +0530<br>
+++ b/source/common/x86/h-<wbr>ipfilter16.asm Mon Apr 23 14:16:54 2018 +0530<br>
@@ -2133,12 +2133,43 @@<br>
IPFILTER_LUMA_PS_4xN_AVX2 8<br>
IPFILTER_LUMA_PS_4xN_AVX2 16<br>
<br>
+ %macro PROCESS_IPFILTER_LUMA_PS_8x1_<wbr>AVX2 1<br>
+<br>
+ %assign x 0<br>
+ %rep %1/8<br>
+ vbroadcasti128 m4, [r0 + x]<br>
+ vbroadcasti128 m5, [r0 + 8+ x]<br>
+ pshufb m4, m3<br>
+ pshufb m7, m5, m3<br>
+ pmaddwd m4, m0<br>
+ pmaddwd m7, m1<br>
+ paddd m4, m7<br>
+<br>
+ vbroadcasti128 m6, [r0 + 16 + x]<br>
+ pshufb m5, m3<br>
+ pshufb m6, m3<br>
+ pmaddwd m5, m0<br>
+ pmaddwd m6, m1<br>
+ paddd m5, m6<br>
+<br>
+ phaddd m4, m5<br>
+ vpermq m4, m4, q3120<br>
+ paddd m4, m2<br>
+ vextracti128 xm5,m4, 1<br>
+ psrad xm4, INTERP_SHIFT_PS<br>
+ psrad xm5, INTERP_SHIFT_PS<br>
+ packssdw xm4, xm5<br>
+ movu [r2 + x], xm4<br>
+ %assign x x+16<br>
+ %endrep<br>
+ %endmacro<br>
+<br>
%macro IPFILTER_LUMA_PS_8xN_AVX2 1<br>
INIT_YMM avx2<br>
%if ARCH_X86_64 == 1<br>
cglobal interp_8tap_horiz_ps_8x%1, 4, 6, 8<br>
- add r1d, r1d<br>
- add r3d, r3d<br>
+ shl r1d, 1<br>
+ shl r3d, 1<br>
mov r4d, r4m<br>
mov r5d, r5m<br>
shl r4d, 4<br>
@@ -2165,30 +2196,7 @@<br>
add r4d, 7<br>
<br>
.loop0:<br>
- vbroadcasti128 m4, [r0]<br>
- vbroadcasti128 m5, [r0 + 8]<br>
- pshufb m4, m3<br>
- pshufb m7, m5, m3<br>
- pmaddwd m4, m0<br>
- pmaddwd m7, m1<br>
- paddd m4, m7<br>
-<br>
- vbroadcasti128 m6, [r0 + 16]<br>
- pshufb m5, m3<br>
- pshufb m6, m3<br>
- pmaddwd m5, m0<br>
- pmaddwd m6, m1<br>
- paddd m5, m6<br>
-<br>
- phaddd m4, m5<br>
- vpermq m4, m4, q3120<br>
- paddd m4, m2<br>
- vextracti128 xm5,m4, 1<br>
- psrad xm4, INTERP_SHIFT_PS<br>
- psrad xm5, INTERP_SHIFT_PS<br>
- packssdw xm4, xm5<br>
-<br>
- movu [r2], xm4<br>
+ PROCESS_IPFILTER_LUMA_PS_8x1_<wbr>AVX2 8<br>
add r2, r3<br>
add r0, r1<br>
dec r4d<br>
@@ -2232,36 +2240,9 @@<br>
sub r0, r6<br>
add r4d, 7<br>
<br>
+<br>
.loop0:<br>
-%assign x 0<br>
-%rep 24/8<br>
- vbroadcasti128 m4, [r0 + x]<br>
- vbroadcasti128 m5, [r0 + 8 + x]<br>
- pshufb m4, m3<br>
- pshufb m7, m5, m3<br>
- pmaddwd m4, m0<br>
- pmaddwd m7, m1<br>
- paddd m4, m7<br>
-<br>
- vbroadcasti128 m6, [r0 + 16 + x]<br>
- pshufb m5, m3<br>
- pshufb m6, m3<br>
- pmaddwd m5, m0<br>
- pmaddwd m6, m1<br>
- paddd m5, m6<br>
-<br>
- phaddd m4, m5<br>
- vpermq m4, m4, q3120<br>
- paddd m4, m2<br>
- vextracti128 xm5,m4, 1<br>
- psrad xm4, INTERP_SHIFT_PS<br>
- psrad xm5, INTERP_SHIFT_PS<br>
- packssdw xm4, xm5<br>
-<br>
- movu [r2 + x], xm4<br>
- %assign x x+16<br>
- %endrep<br>
-<br>
+ PROCESS_IPFILTER_LUMA_PS_8x1_<wbr>AVX2 24<br>
add r2, r3<br>
add r0, r1<br>
dec r4d<br>
<br>______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/<wbr>listinfo/x265-devel</a><br>
<br></blockquote></div><br></div><div class="gmail_extra">Pushed this patch series.</div></div>