[x265] [PATCH] asm: pixel_avg[32x16]
Min Chen
chenm003 at 163.com
Mon Nov 11 13:52:06 CET 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1384174318 -28800
# Node ID b9f74f0f49b3020eac49edd87c4f20778e466fe5
# Parent 9642b5b6500b5553ab3ce70a360aaaadad5d7234
asm: pixel_avg[32x16]
diff -r 9642b5b6500b -r b9f74f0f49b3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Nov 11 17:41:32 2013 +0800
+++ b/source/common/x86/asm-primitives.cpp Mon Nov 11 20:51:58 2013 +0800
@@ -120,6 +120,7 @@
p.sa8d_inter[LUMA_16x64] = cmp < 16, 64, 16, 16, x265_pixel_sa8d_16x16_ ## cpu >
#define PIXEL_AVG(cpu) \
+ p.pixelavg_pp[LUMA_32x16] = x265_pixel_avg_32x16_ ## cpu; \
p.pixelavg_pp[LUMA_16x16] = x265_pixel_avg_16x16_ ## cpu; \
p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \
p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \
diff -r 9642b5b6500b -r b9f74f0f49b3 source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm Mon Nov 11 17:41:32 2013 +0800
+++ b/source/common/x86/mc-a.asm Mon Nov 11 20:51:58 2013 +0800
@@ -236,6 +236,7 @@
AVG_WEIGHT 4
AVG_WEIGHT 8
AVG_WEIGHT 16
+AVG_WEIGHT 32
%if HIGH_BIT_DEPTH
INIT_XMM sse2
AVG_WEIGHT 4, 8
@@ -245,6 +246,7 @@
INIT_XMM sse2
AVG_WEIGHT 8, 7
AVG_WEIGHT 16, 7
+AVG_WEIGHT 32, 7
%define BIWEIGHT BIWEIGHT_SSSE3
%define BIWEIGHT_START BIWEIGHT_START_SSSE3
INIT_MMX ssse3
@@ -252,6 +254,7 @@
INIT_XMM ssse3
AVG_WEIGHT 8, 7
AVG_WEIGHT 16, 7
+AVG_WEIGHT 32, 7
INIT_YMM avx2
cglobal pixel_avg_weight_w16
@@ -632,7 +635,7 @@
%if cpuflag(avx2) && %1 == 16 ; all AVX2 machines can do fast 16-byte unaligned loads
jmp pixel_avg_w%1_avx2
%else
-%if mmsize == 16 && %1 == 16
+%if mmsize == 16 && (%1 % 16 == 0)
test dword r4m, 15
jz pixel_avg_w%1_sse2
%endif
@@ -719,7 +722,12 @@
AVGH 16, 16
AVGH 16, 8
+AVG_FUNC 32, movq, movq
+AVGH 32, 16
+
INIT_XMM sse2
+AVG_FUNC 32, movdqu, movdqa
+AVGH 32, 16
AVG_FUNC 16, movdqu, movdqa
AVGH 16, 16
AVGH 16, 8
@@ -727,6 +735,7 @@
AVGH 8, 8
AVGH 8, 4
INIT_XMM ssse3
+AVGH 32, 16
AVGH 16, 16
AVGH 16, 8
AVGH 8, 16
@@ -738,6 +747,9 @@
AVGH 4, 4
AVGH 4, 2
INIT_XMM avx2
+; TODO: active AVX2 after debug
+;AVG_FUNC 32, movdqu, movdqa
+;AVGH 32, 16
AVG_FUNC 16, movdqu, movdqa
AVGH 16, 16
AVGH 16, 8
diff -r 9642b5b6500b -r b9f74f0f49b3 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Mon Nov 11 17:41:32 2013 +0800
+++ b/source/common/x86/pixel.h Mon Nov 11 20:51:58 2013 +0800
@@ -238,6 +238,7 @@
void func ## _mmx2 args; \
void func ## _sse2 args; \
void func ## _ssse3 args;
+DECL_SUF(x265_pixel_avg_32x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
DECL_SUF(x265_pixel_avg_16x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
DECL_SUF(x265_pixel_avg_16x8, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
DECL_SUF(x265_pixel_avg_8x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
More information about the x265-devel
mailing list