[x265] [PATCH] asm: avx2 code for chroma add_ps, reused luma code

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Wed Apr 15 13:39:54 CEST 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1429095027 -19800
#      Wed Apr 15 16:20:27 2015 +0530
# Node ID 7133956fee5836bb82f683d3a3fe9b5a612e34f9
# Parent  737edf5ac0088867cbd9d6d0d85958d594ee6c05
asm: avx2 code for chroma add_ps, reused luma code

sse4
[i422]  add_ps[32x64]  20.56x   3937.07         80931.80
[i422]  add_ps[16x32]  18.12x   1184.91         21470.56

avx2
[i422]  add_ps[32x64]  25.79x   2764.54         71294.41
[i422]  add_ps[16x32]  17.17x   1063.50         18259.36

diff -r 737edf5ac008 -r 7133956fee58 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Apr 15 10:58:54 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Apr 15 16:20:27 2015 +0530
@@ -1664,6 +1664,8 @@
         p.cu[BLOCK_64x64].add_ps = x265_pixel_add_ps_64x64_avx2;
         p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].add_ps = x265_pixel_add_ps_16x16_avx2;
         p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].add_ps = x265_pixel_add_ps_32x32_avx2;
+        p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].add_ps = x265_pixel_add_ps_16x32_avx2;
+        p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].add_ps = x265_pixel_add_ps_32x64_avx2;
 
         p.cu[BLOCK_16x16].sub_ps = x265_pixel_sub_ps_16x16_avx2;
         p.cu[BLOCK_32x32].sub_ps = x265_pixel_sub_ps_32x32_avx2;
diff -r 737edf5ac008 -r 7133956fee58 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Wed Apr 15 10:58:54 2015 +0530
+++ b/source/common/x86/pixel.h	Wed Apr 15 16:20:27 2015 +0530
@@ -257,6 +257,8 @@
 void x265_pixel_add_ps_16x16_avx2(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
 void x265_pixel_add_ps_32x32_avx2(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
 void x265_pixel_add_ps_64x64_avx2(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
+void x265_pixel_add_ps_16x32_avx2(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
+void x265_pixel_add_ps_32x64_avx2(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
 
 void x265_pixel_sub_ps_16x16_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
 void x265_pixel_sub_ps_32x32_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);


More information about the x265-devel mailing list