[x265] [PATCH 2 of 2] asm: assembly code for x265_pixel_avg_12x16
Min Chen
chenm003 at 163.com
Tue Nov 12 12:27:15 CET 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1384255626 -28800
# Node ID d0fb42b3fa396e413dc510bd9cf7eb2a8da22f31
# Parent 0d8ed55eb94d7cf43afb687edbad7f3db349b84c
asm: assembly code for x265_pixel_avg_12x16
diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Nov 12 19:26:48 2013 +0800
+++ b/source/common/x86/asm-primitives.cpp Tue Nov 12 19:27:06 2013 +0800
@@ -134,6 +134,7 @@
p.pixelavg_pp[LUMA_16x12] = x265_pixel_avg_16x12_ ## cpu; \
p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \
p.pixelavg_pp[LUMA_16x4] = x265_pixel_avg_16x4_ ## cpu; \
+ p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \
p.pixelavg_pp[LUMA_8x32] = x265_pixel_avg_8x32_ ## cpu; \
p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \
p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \
diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm Tue Nov 12 19:26:48 2013 +0800
+++ b/source/common/x86/mc-a.asm Tue Nov 12 19:27:06 2013 +0800
@@ -190,7 +190,13 @@
SWAP 0, 6
BIWEIGHT [%2+mmsize/2], [%3+mmsize/2]
packuswb m6, m0
- mova [%1], m6
+%if %4 != 12
+ mova [%1], m6
+%else ; !w12
+ movh [%1], m6
+ movhlps m6, m6
+ movd [%1+mmsize/2], m6
+%endif ; w12
%endif
%endmacro
@@ -222,8 +228,12 @@
%else
%assign x 0
%rep (%1*SIZEOF_PIXEL+mmsize-1)/mmsize
- BIWEIGHT_ROW t0+x, t2+x, t4+x, %1
- BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3, t4+x+SIZEOF_PIXEL*t5, %1
+%assign y mmsize
+%if (%1 == 12) && (%1*SIZEOF_PIXEL-x < mmsize)
+%assign y (%1*SIZEOF_PIXEL-x)
+%endif
+ BIWEIGHT_ROW t0+x, t2+x, t4+x, y
+ BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3, t4+x+SIZEOF_PIXEL*t5, y
%assign x x+mmsize
%endrep
%endif
@@ -235,6 +245,7 @@
INIT_MMX mmx2
AVG_WEIGHT 4
AVG_WEIGHT 8
+AVG_WEIGHT 12
AVG_WEIGHT 16
AVG_WEIGHT 32
AVG_WEIGHT 64
@@ -248,6 +259,7 @@
%else ;!HIGH_BIT_DEPTH
INIT_XMM sse2
AVG_WEIGHT 8, 7
+AVG_WEIGHT 12, 7
AVG_WEIGHT 16, 7
AVG_WEIGHT 32, 7
AVG_WEIGHT 64, 7
@@ -259,6 +271,7 @@
AVG_WEIGHT 4
INIT_XMM ssse3
AVG_WEIGHT 8, 7
+AVG_WEIGHT 12, 7
AVG_WEIGHT 16, 7
AVG_WEIGHT 32, 7
AVG_WEIGHT 64, 7
@@ -657,7 +670,7 @@
; pixel *src2, intptr_t src2_stride, int height, int weight );
;-----------------------------------------------------------------------------
-%macro AVG_FUNC 3
+%macro AVG_FUNC 3-4
cglobal pixel_avg_w%1
AVG_START
.height_loop:
@@ -672,8 +685,13 @@
pavgb m0, [t4+x]
pavgb m1, [t4+x+SIZEOF_PIXEL*t5]
%endif
+%if (%1 == 12) && (%1-x/SIZEOF_PIXEL < mmsize)
+ %4 [t0+x], m0
+ %4 [t0+x+SIZEOF_PIXEL*t1], m1
+%else
%3 [t0+x], m0
%3 [t0+x+SIZEOF_PIXEL*t1], m1
+%endif
%assign x x+mmsize
%endrep
AVG_END
@@ -728,6 +746,9 @@
AVGH 8, 8
AVGH 8, 4
+AVG_FUNC 12, movq, movq, movd
+AVGH 12, 16
+
AVG_FUNC 16, movq, movq
AVGH 16, 64
AVGH 16, 32
@@ -780,6 +801,9 @@
AVG_FUNC 48, movdqu, movdqa
AVGH 48, 64
+AVG_FUNC 12, movdqu, movdqa, movq
+AVGH 12, 16
+
AVGH 8, 32
AVGH 8, 16
AVGH 8, 8
@@ -806,6 +830,8 @@
AVGH 48, 64
+AVGH 12, 16
+
AVGH 8, 32
AVGH 8, 16
AVGH 8, 8
diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Tue Nov 12 19:26:48 2013 +0800
+++ b/source/common/x86/pixel.h Tue Nov 12 19:27:06 2013 +0800
@@ -254,6 +254,7 @@
DECL_SUF(x265_pixel_avg_16x12, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
DECL_SUF(x265_pixel_avg_16x8, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
DECL_SUF(x265_pixel_avg_16x4, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
+DECL_SUF(x265_pixel_avg_12x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
DECL_SUF(x265_pixel_avg_8x32, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
DECL_SUF(x265_pixel_avg_8x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
DECL_SUF(x265_pixel_avg_8x8, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
More information about the x265-devel
mailing list