<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Tue, Nov 12, 2013 at 5:27 AM, Min Chen <span dir="ltr"><<a href="mailto:chenm003@163.com" target="_blank">chenm003@163.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Min Chen <<a href="mailto:chenm003@163.com">chenm003@163.com</a>><br>
# Date 1384255626 -28800<br>
# Node ID d0fb42b3fa396e413dc510bd9cf7eb2a8da22f31<br>
# Parent 0d8ed55eb94d7cf43afb687edbad7f3db349b84c<br>
asm: assembly code for x265_pixel_avg_12x16<br></blockquote><div><br></div><div>oh; taking this one instead of the one I fixed up</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Tue Nov 12 19:26:48 2013 +0800<br>
+++ b/source/common/x86/asm-primitives.cpp Tue Nov 12 19:27:06 2013 +0800<br>
@@ -134,6 +134,7 @@<br>
p.pixelavg_pp[LUMA_16x12] = x265_pixel_avg_16x12_ ## cpu; \<br>
p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \<br>
p.pixelavg_pp[LUMA_16x4] = x265_pixel_avg_16x4_ ## cpu; \<br>
+ p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \<br>
p.pixelavg_pp[LUMA_8x32] = x265_pixel_avg_8x32_ ## cpu; \<br>
p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \<br>
p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \<br>
diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/mc-a.asm<br>
--- a/source/common/x86/mc-a.asm Tue Nov 12 19:26:48 2013 +0800<br>
+++ b/source/common/x86/mc-a.asm Tue Nov 12 19:27:06 2013 +0800<br>
@@ -190,7 +190,13 @@<br>
SWAP 0, 6<br>
BIWEIGHT [%2+mmsize/2], [%3+mmsize/2]<br>
packuswb m6, m0<br>
- mova [%1], m6<br>
+%if %4 != 12<br>
+ mova [%1], m6<br>
+%else ; !w12<br>
+ movh [%1], m6<br>
+ movhlps m6, m6<br>
+ movd [%1+mmsize/2], m6<br>
+%endif ; w12<br>
%endif<br>
%endmacro<br>
<br>
@@ -222,8 +228,12 @@<br>
%else<br>
%assign x 0<br>
%rep (%1*SIZEOF_PIXEL+mmsize-1)/mmsize<br>
- BIWEIGHT_ROW t0+x, t2+x, t4+x, %1<br>
- BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3, t4+x+SIZEOF_PIXEL*t5, %1<br>
+%assign y mmsize<br>
+%if (%1 == 12) && (%1*SIZEOF_PIXEL-x < mmsize)<br>
+%assign y (%1*SIZEOF_PIXEL-x)<br>
+%endif<br>
+ BIWEIGHT_ROW t0+x, t2+x, t4+x, y<br>
+ BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3, t4+x+SIZEOF_PIXEL*t5, y<br>
%assign x x+mmsize<br>
%endrep<br>
%endif<br>
@@ -235,6 +245,7 @@<br>
INIT_MMX mmx2<br>
AVG_WEIGHT 4<br>
AVG_WEIGHT 8<br>
+AVG_WEIGHT 12<br>
AVG_WEIGHT 16<br>
AVG_WEIGHT 32<br>
AVG_WEIGHT 64<br>
@@ -248,6 +259,7 @@<br>
%else ;!HIGH_BIT_DEPTH<br>
INIT_XMM sse2<br>
AVG_WEIGHT 8, 7<br>
+AVG_WEIGHT 12, 7<br>
AVG_WEIGHT 16, 7<br>
AVG_WEIGHT 32, 7<br>
AVG_WEIGHT 64, 7<br>
@@ -259,6 +271,7 @@<br>
AVG_WEIGHT 4<br>
INIT_XMM ssse3<br>
AVG_WEIGHT 8, 7<br>
+AVG_WEIGHT 12, 7<br>
AVG_WEIGHT 16, 7<br>
AVG_WEIGHT 32, 7<br>
AVG_WEIGHT 64, 7<br>
@@ -657,7 +670,7 @@<br>
; pixel *src2, intptr_t src2_stride, int height, int weight );<br>
;-----------------------------------------------------------------------------<br>
<br>
-%macro AVG_FUNC 3<br>
+%macro AVG_FUNC 3-4<br>
cglobal pixel_avg_w%1<br>
AVG_START<br>
.height_loop:<br>
@@ -672,8 +685,13 @@<br>
pavgb m0, [t4+x]<br>
pavgb m1, [t4+x+SIZEOF_PIXEL*t5]<br>
%endif<br>
+%if (%1 == 12) && (%1-x/SIZEOF_PIXEL < mmsize)<br>
+ %4 [t0+x], m0<br>
+ %4 [t0+x+SIZEOF_PIXEL*t1], m1<br>
+%else<br>
%3 [t0+x], m0<br>
%3 [t0+x+SIZEOF_PIXEL*t1], m1<br>
+%endif<br>
%assign x x+mmsize<br>
%endrep<br>
AVG_END<br>
@@ -728,6 +746,9 @@<br>
AVGH 8, 8<br>
AVGH 8, 4<br>
<br>
+AVG_FUNC 12, movq, movq, movd<br>
+AVGH 12, 16<br>
+<br>
AVG_FUNC 16, movq, movq<br>
AVGH 16, 64<br>
AVGH 16, 32<br>
@@ -780,6 +801,9 @@<br>
AVG_FUNC 48, movdqu, movdqa<br>
AVGH 48, 64<br>
<br>
+AVG_FUNC 12, movdqu, movdqa, movq<br>
+AVGH 12, 16<br>
+<br>
AVGH 8, 32<br>
AVGH 8, 16<br>
AVGH 8, 8<br>
@@ -806,6 +830,8 @@<br>
<br>
AVGH 48, 64<br>
<br>
+AVGH 12, 16<br>
+<br>
AVGH 8, 32<br>
AVGH 8, 16<br>
AVGH 8, 8<br>
diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/pixel.h<br>
--- a/source/common/x86/pixel.h Tue Nov 12 19:26:48 2013 +0800<br>
+++ b/source/common/x86/pixel.h Tue Nov 12 19:27:06 2013 +0800<br>
@@ -254,6 +254,7 @@<br>
DECL_SUF(x265_pixel_avg_16x12, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
DECL_SUF(x265_pixel_avg_16x8, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
DECL_SUF(x265_pixel_avg_16x4, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
+DECL_SUF(x265_pixel_avg_12x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
DECL_SUF(x265_pixel_avg_8x32, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
DECL_SUF(x265_pixel_avg_8x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
DECL_SUF(x265_pixel_avg_8x8, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))<br>
<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>