<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Tue, Nov 12, 2013 at 2:17 AM, Min Chen <span dir="ltr"><<a href="mailto:chenm003@163.com" target="_blank">chenm003@163.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Min Chen <<a href="mailto:chenm003@163.com">chenm003@163.com</a>><br>
# Date 1384244066 -28800<br>
# Node ID 9dda3a715f9fe089ee7b1e4db2ffeff28cd477c3<br>
# Parent 8c731f8c71ff6f42718a80934433a154417caeec<br>
asm: assembly code for x265_pixel_avg_12x16<br></blockquote><div><br></div><div>queued with some manual patch merging</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Tue Nov 12 16:14:09 2013 +0800<br>
+++ b/source/common/x86/asm-primitives.cpp Tue Nov 12 16:14:26 2013 +0800<br>
@@ -125,6 +125,7 @@<br>
p.pixelavg_pp[LUMA_16x12] = x265_pixel_avg_16x12_ ## cpu; \<br>
p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \<br>
p.pixelavg_pp[LUMA_16x4] = x265_pixel_avg_16x4_ ## cpu; \<br>
+ p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \<br>
p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \<br>
p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \<br>
p.pixelavg_pp[LUMA_8x4] = x265_pixel_avg_8x4_ ## cpu;<br>
diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/mc-a.asm<br>
--- a/source/common/x86/mc-a.asm Tue Nov 12 16:14:09 2013 +0800<br>
+++ b/source/common/x86/mc-a.asm Tue Nov 12 16:14:26 2013 +0800<br>
@@ -190,7 +190,13 @@<br>
SWAP 0, 6<br>
BIWEIGHT [%2+mmsize/2], [%3+mmsize/2]<br>
packuswb m6, m0<br>
- mova [%1], m6<br>
+%if %4 != 12<br>
+ mova [%1], m6<br>
+%else ; !w12<br>
+ movh [%1], m6<br>
+ movhlps m6, m6<br>
+ movd [%1+mmsize/2], m6<br>
+%endif ; w12<br>
%endif<br>
%endmacro<br>
<br>
@@ -222,8 +228,12 @@<br>
%else<br>
%assign x 0<br>
%rep (%1*SIZEOF_PIXEL+mmsize-1)/mmsize<br>
- BIWEIGHT_ROW t0+x, t2+x, t4+x, %1<br>
- BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3, t4+x+SIZEOF_PIXEL*t5, %1<br>
+%assign y mmsize<br>
+%if (%1 == 12) && (%1*SIZEOF_PIXEL-x < mmsize)<br>
+%assign y (%1*SIZEOF_PIXEL-x)<br>
+%endif<br>
+ BIWEIGHT_ROW t0+x, t2+x, t4+x, y<br>
+ BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3, t4+x+SIZEOF_PIXEL*t5, y<br>
%assign x x+mmsize<br>
%endrep<br>
%endif<br>
@@ -235,6 +245,7 @@<br>
INIT_MMX mmx2<br>
AVG_WEIGHT 4<br>
AVG_WEIGHT 8<br>
+AVG_WEIGHT 12<br>
AVG_WEIGHT 16<br>
AVG_WEIGHT 32<br>
%if HIGH_BIT_DEPTH<br>
@@ -245,6 +256,7 @@<br>
%else ;!HIGH_BIT_DEPTH<br>
INIT_XMM sse2<br>
AVG_WEIGHT 8, 7<br>
+AVG_WEIGHT 12, 7<br>
AVG_WEIGHT 16, 7<br>
AVG_WEIGHT 32, 7<br>
%define BIWEIGHT BIWEIGHT_SSSE3<br>
@@ -253,6 +265,7 @@<br>
AVG_WEIGHT 4<br>
INIT_XMM ssse3<br>
AVG_WEIGHT 8, 7<br>
+AVG_WEIGHT 12, 7<br>
AVG_WEIGHT 16, 7<br>
AVG_WEIGHT 32, 7<br>
<br>
@@ -648,7 +661,7 @@<br>
; pixel *src2, intptr_t src2_stride, int height, int weight );<br>
;-----------------------------------------------------------------------------<br>
<br>
-%macro AVG_FUNC 3<br>
+%macro AVG_FUNC 3-4<br>
cglobal pixel_avg_w%1<br>
AVG_START<br>
.height_loop:<br>
@@ -663,8 +676,13 @@<br>
pavgb m0, [t4+x]<br>
pavgb m1, [t4+x+SIZEOF_PIXEL*t5]<br>
%endif<br>
+%if (%1 == 12) && (%1-x/SIZEOF_PIXEL < mmsize)<br>
+ %4 [t0+x], m0<br>
+ %4 [t0+x+SIZEOF_PIXEL*t1], m1<br>
+%else<br>
%3 [t0+x], m0<br>
%3 [t0+x+SIZEOF_PIXEL*t1], m1<br>
+%endif<br>
%assign x x+mmsize<br>
%endrep<br>
AVG_END<br>
@@ -718,6 +736,9 @@<br>
AVGH 8, 8<br>
AVGH 8, 4<br>
<br>
+AVG_FUNC 12, movq, movq, movd<br>
+AVGH 12, 16<br>
+<br>
AVG_FUNC 16, movq, movq<br>
AVGH 16, 64<br>
AVGH 16, 32<br>
@@ -739,6 +760,8 @@<br>
AVGH 16, 12<br>
AVGH 16, 8<br>
AVGH 16, 4<br>
+AVG_FUNC 12, movdqu, movdqa, movq<br>
+AVGH 12, 16<br>
AVGH 8, 16<br>
AVGH 8, 8<br>
AVGH 8, 4<br>
@@ -750,6 +773,7 @@<br>
AVGH 16, 12<br>
AVGH 16, 8<br>
AVGH 16, 4<br>
+AVGH 12, 16<br>
AVGH 8, 16<br>
AVGH 8, 8<br>
AVGH 8, 4<br>
diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/pixel.h<br>
--- a/source/common/x86/pixel.h Tue Nov 12 16:14:09 2013 +0800<br>
+++ b/source/common/x86/pixel.h Tue Nov 12 16:14:26 2013 +0800<br>
@@ -245,6 +245,7 @@<br>
DECL_SUF(x265_pixel_avg_16x12, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
DECL_SUF(x265_pixel_avg_16x8, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
DECL_SUF(x265_pixel_avg_16x4, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
+DECL_SUF(x265_pixel_avg_12x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
DECL_SUF(x265_pixel_avg_8x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
DECL_SUF(x265_pixel_avg_8x8, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
DECL_SUF(x265_pixel_avg_8x4, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>