[x265] [PATCH 2 of 2] asm: assembly code for x265_pixel_avg_12x16

Min Chen chenm003 at 163.com
Tue Nov 12 09:17:28 CET 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1384244066 -28800
# Node ID 9dda3a715f9fe089ee7b1e4db2ffeff28cd477c3
# Parent  8c731f8c71ff6f42718a80934433a154417caeec
asm: assembly code for x265_pixel_avg_12x16

diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Nov 12 16:14:09 2013 +0800
+++ b/source/common/x86/asm-primitives.cpp	Tue Nov 12 16:14:26 2013 +0800
@@ -125,6 +125,7 @@
     p.pixelavg_pp[LUMA_16x12]  = x265_pixel_avg_16x12_ ## cpu; \
     p.pixelavg_pp[LUMA_16x8]  = x265_pixel_avg_16x8_ ## cpu; \
     p.pixelavg_pp[LUMA_16x4]  = x265_pixel_avg_16x4_ ## cpu; \
+    p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \
     p.pixelavg_pp[LUMA_8x16]  = x265_pixel_avg_8x16_ ## cpu; \
     p.pixelavg_pp[LUMA_8x8]   = x265_pixel_avg_8x8_ ## cpu; \
     p.pixelavg_pp[LUMA_8x4]   = x265_pixel_avg_8x4_ ## cpu;
diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm	Tue Nov 12 16:14:09 2013 +0800
+++ b/source/common/x86/mc-a.asm	Tue Nov 12 16:14:26 2013 +0800
@@ -190,7 +190,13 @@
     SWAP 0, 6
     BIWEIGHT [%2+mmsize/2], [%3+mmsize/2]
     packuswb   m6, m0
-    mova     [%1], m6
+%if %4 != 12
+    mova    [%1], m6
+%else ; !w12
+    movh    [%1], m6
+    movhlps m6, m6
+    movd    [%1+mmsize/2], m6
+%endif ; w12
 %endif
 %endmacro
 
@@ -222,8 +228,12 @@
 %else
 %assign x 0
 %rep (%1*SIZEOF_PIXEL+mmsize-1)/mmsize
-    BIWEIGHT_ROW   t0+x,                   t2+x,                   t4+x,                 %1
-    BIWEIGHT_ROW   t0+x+SIZEOF_PIXEL*t1,   t2+x+SIZEOF_PIXEL*t3,   t4+x+SIZEOF_PIXEL*t5, %1
+%assign y mmsize
+%if (%1 == 12) && (%1*SIZEOF_PIXEL-x < mmsize)
+%assign y (%1*SIZEOF_PIXEL-x)
+%endif
+    BIWEIGHT_ROW   t0+x,                   t2+x,                   t4+x,                 y
+    BIWEIGHT_ROW   t0+x+SIZEOF_PIXEL*t1,   t2+x+SIZEOF_PIXEL*t3,   t4+x+SIZEOF_PIXEL*t5, y
 %assign x x+mmsize
 %endrep
 %endif
@@ -235,6 +245,7 @@
 INIT_MMX mmx2
 AVG_WEIGHT 4
 AVG_WEIGHT 8
+AVG_WEIGHT 12
 AVG_WEIGHT 16
 AVG_WEIGHT 32
 %if HIGH_BIT_DEPTH
@@ -245,6 +256,7 @@
 %else ;!HIGH_BIT_DEPTH
 INIT_XMM sse2
 AVG_WEIGHT 8,  7
+AVG_WEIGHT 12, 7
 AVG_WEIGHT 16, 7
 AVG_WEIGHT 32, 7
 %define BIWEIGHT BIWEIGHT_SSSE3
@@ -253,6 +265,7 @@
 AVG_WEIGHT 4
 INIT_XMM ssse3
 AVG_WEIGHT 8,  7
+AVG_WEIGHT 12, 7
 AVG_WEIGHT 16, 7
 AVG_WEIGHT 32, 7
 
@@ -648,7 +661,7 @@
 ;                    pixel *src2, intptr_t src2_stride, int height, int weight );
 ;-----------------------------------------------------------------------------
 
-%macro AVG_FUNC 3
+%macro AVG_FUNC 3-4
 cglobal pixel_avg_w%1
     AVG_START
 .height_loop:
@@ -663,8 +676,13 @@
     pavgb  m0, [t4+x]
     pavgb  m1, [t4+x+SIZEOF_PIXEL*t5]
 %endif
+%if (%1 == 12) && (%1-x/SIZEOF_PIXEL < mmsize)
+    %4     [t0+x], m0
+    %4     [t0+x+SIZEOF_PIXEL*t1], m1
+%else
     %3     [t0+x], m0
     %3     [t0+x+SIZEOF_PIXEL*t1], m1
+%endif
 %assign x x+mmsize
 %endrep
     AVG_END
@@ -718,6 +736,9 @@
 AVGH 8,  8
 AVGH 8,  4
 
+AVG_FUNC 12, movq, movq, movd
+AVGH 12, 16
+
 AVG_FUNC 16, movq, movq
 AVGH 16, 64
 AVGH 16, 32
@@ -739,6 +760,8 @@
 AVGH 16, 12
 AVGH 16, 8
 AVGH 16, 4
+AVG_FUNC 12, movdqu, movdqa, movq
+AVGH 12, 16
 AVGH  8, 16
 AVGH  8,  8
 AVGH  8,  4
@@ -750,6 +773,7 @@
 AVGH 16, 12
 AVGH 16, 8
 AVGH 16, 4
+AVGH 12, 16
 AVGH  8, 16
 AVGH  8,  8
 AVGH  8,  4
diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Tue Nov 12 16:14:09 2013 +0800
+++ b/source/common/x86/pixel.h	Tue Nov 12 16:14:26 2013 +0800
@@ -245,6 +245,7 @@
 DECL_SUF(x265_pixel_avg_16x12, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
 DECL_SUF(x265_pixel_avg_16x8,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
 DECL_SUF(x265_pixel_avg_16x4,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
+DECL_SUF(x265_pixel_avg_12x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
 DECL_SUF(x265_pixel_avg_8x16,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
 DECL_SUF(x265_pixel_avg_8x8,   (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
 DECL_SUF(x265_pixel_avg_8x4,   (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))



More information about the x265-devel mailing list