[x265] [PATCH] asm: pixel_avg[32x16]

Min Chen chenm003 at 163.com
Mon Nov 11 13:52:06 CET 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1384174318 -28800
# Node ID b9f74f0f49b3020eac49edd87c4f20778e466fe5
# Parent  9642b5b6500b5553ab3ce70a360aaaadad5d7234
asm: pixel_avg[32x16]

diff -r 9642b5b6500b -r b9f74f0f49b3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Nov 11 17:41:32 2013 +0800
+++ b/source/common/x86/asm-primitives.cpp	Mon Nov 11 20:51:58 2013 +0800
@@ -120,6 +120,7 @@
     p.sa8d_inter[LUMA_16x64] = cmp < 16, 64, 16, 16, x265_pixel_sa8d_16x16_ ## cpu >
 
 #define PIXEL_AVG(cpu) \
+    p.pixelavg_pp[LUMA_32x16] = x265_pixel_avg_32x16_ ## cpu; \
     p.pixelavg_pp[LUMA_16x16] = x265_pixel_avg_16x16_ ## cpu; \
     p.pixelavg_pp[LUMA_16x8]  = x265_pixel_avg_16x8_ ## cpu; \
     p.pixelavg_pp[LUMA_8x16]  = x265_pixel_avg_8x16_ ## cpu; \
diff -r 9642b5b6500b -r b9f74f0f49b3 source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm	Mon Nov 11 17:41:32 2013 +0800
+++ b/source/common/x86/mc-a.asm	Mon Nov 11 20:51:58 2013 +0800
@@ -236,6 +236,7 @@
 AVG_WEIGHT 4
 AVG_WEIGHT 8
 AVG_WEIGHT 16
+AVG_WEIGHT 32
 %if HIGH_BIT_DEPTH
 INIT_XMM sse2
 AVG_WEIGHT 4,  8
@@ -245,6 +246,7 @@
 INIT_XMM sse2
 AVG_WEIGHT 8,  7
 AVG_WEIGHT 16, 7
+AVG_WEIGHT 32, 7
 %define BIWEIGHT BIWEIGHT_SSSE3
 %define BIWEIGHT_START BIWEIGHT_START_SSSE3
 INIT_MMX ssse3
@@ -252,6 +254,7 @@
 INIT_XMM ssse3
 AVG_WEIGHT 8,  7
 AVG_WEIGHT 16, 7
+AVG_WEIGHT 32, 7
 
 INIT_YMM avx2
 cglobal pixel_avg_weight_w16
@@ -632,7 +635,7 @@
 %if cpuflag(avx2) && %1 == 16 ; all AVX2 machines can do fast 16-byte unaligned loads
     jmp pixel_avg_w%1_avx2
 %else
-%if mmsize == 16 && %1 == 16
+%if mmsize == 16 && (%1 % 16 == 0)
     test dword r4m, 15
     jz pixel_avg_w%1_sse2
 %endif
@@ -719,7 +722,12 @@
 AVGH 16, 16
 AVGH 16, 8
 
+AVG_FUNC 32, movq, movq
+AVGH 32, 16
+
 INIT_XMM sse2
+AVG_FUNC 32, movdqu, movdqa
+AVGH 32, 16
 AVG_FUNC 16, movdqu, movdqa
 AVGH 16, 16
 AVGH 16,  8
@@ -727,6 +735,7 @@
 AVGH  8,  8
 AVGH  8,  4
 INIT_XMM ssse3
+AVGH 32, 16
 AVGH 16, 16
 AVGH 16,  8
 AVGH  8, 16
@@ -738,6 +747,9 @@
 AVGH  4,  4
 AVGH  4,  2
 INIT_XMM avx2
+; TODO: active AVX2 after debug
+;AVG_FUNC 32, movdqu, movdqa
+;AVGH 32, 16
 AVG_FUNC 16, movdqu, movdqa
 AVGH 16, 16
 AVGH 16,  8
diff -r 9642b5b6500b -r b9f74f0f49b3 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Mon Nov 11 17:41:32 2013 +0800
+++ b/source/common/x86/pixel.h	Mon Nov 11 20:51:58 2013 +0800
@@ -238,6 +238,7 @@
     void func ## _mmx2 args; \
     void func ## _sse2 args; \
     void func ## _ssse3 args;
+DECL_SUF(x265_pixel_avg_32x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
 DECL_SUF(x265_pixel_avg_16x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
 DECL_SUF(x265_pixel_avg_16x8,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
 DECL_SUF(x265_pixel_avg_8x16,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))



More information about the x265-devel mailing list