[x265] [PATCH] asm: avx2 code for chroma add_ps, reused luma code
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Wed Apr 15 13:39:54 CEST 2015
# HG changeset patch
# User Sumalatha Polureddy
# Date 1429095027 -19800
# Wed Apr 15 16:20:27 2015 +0530
# Node ID 7133956fee5836bb82f683d3a3fe9b5a612e34f9
# Parent 737edf5ac0088867cbd9d6d0d85958d594ee6c05
asm: avx2 code for chroma add_ps, reused luma code
sse4
[i422] add_ps[32x64] 20.56x 3937.07 80931.80
[i422] add_ps[16x32] 18.12x 1184.91 21470.56
avx2
[i422] add_ps[32x64] 25.79x 2764.54 71294.41
[i422] add_ps[16x32] 17.17x 1063.50 18259.36
diff -r 737edf5ac008 -r 7133956fee58 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Apr 15 10:58:54 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Apr 15 16:20:27 2015 +0530
@@ -1664,6 +1664,8 @@
p.cu[BLOCK_64x64].add_ps = x265_pixel_add_ps_64x64_avx2;
p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].add_ps = x265_pixel_add_ps_16x16_avx2;
p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].add_ps = x265_pixel_add_ps_32x32_avx2;
+ p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].add_ps = x265_pixel_add_ps_16x32_avx2;
+ p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].add_ps = x265_pixel_add_ps_32x64_avx2;
p.cu[BLOCK_16x16].sub_ps = x265_pixel_sub_ps_16x16_avx2;
p.cu[BLOCK_32x32].sub_ps = x265_pixel_sub_ps_32x32_avx2;
diff -r 737edf5ac008 -r 7133956fee58 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Wed Apr 15 10:58:54 2015 +0530
+++ b/source/common/x86/pixel.h Wed Apr 15 16:20:27 2015 +0530
@@ -257,6 +257,8 @@
void x265_pixel_add_ps_16x16_avx2(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
void x265_pixel_add_ps_32x32_avx2(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
void x265_pixel_add_ps_64x64_avx2(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
+void x265_pixel_add_ps_16x32_avx2(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
+void x265_pixel_add_ps_32x64_avx2(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
void x265_pixel_sub_ps_16x16_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
void x265_pixel_sub_ps_32x32_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
More information about the x265-devel
mailing list