[x265] [PATCH 2 of 5] asm: avx2 code for add_ps for chroma sizes 16x16, 32x32, reused the code from luma

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Wed Mar 25 06:35:46 CET 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1427260835 -19800
#      Wed Mar 25 10:50:35 2015 +0530
# Node ID f8fa1887edcec2008fb3d6e7bbd58b681c8f6cc5
# Parent  49d9c978601618ab2588b2a6f11eeeb59fb57450
asm: avx2 code for add_ps for chroma sizes 16x16, 32x32, reused the code from luma

sse3
[i420]  add_ps[16x16]  17.39x   625.09          10867.35
[i420]  add_ps[32x32]  21.70x   1978.74         42930.85

avx2
[i420]  add_ps[16x16]  21.19x   482.93          10234.38
[i420]  add_ps[32x32]  29.58x   1442.61         42678.27

diff -r 49d9c9786016 -r f8fa1887edce source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Mar 25 10:49:21 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Mar 25 10:50:35 2015 +0530
@@ -1484,6 +1484,8 @@
         p.cu[BLOCK_16x16].add_ps = x265_pixel_add_ps_16x16_avx2;
         p.cu[BLOCK_32x32].add_ps = x265_pixel_add_ps_32x32_avx2;
         p.cu[BLOCK_64x64].add_ps = x265_pixel_add_ps_64x64_avx2;
+        p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].add_ps = x265_pixel_add_ps_16x16_avx2;
+        p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].add_ps = x265_pixel_add_ps_32x32_avx2;
 
         p.cu[BLOCK_16x16].sub_ps = x265_pixel_sub_ps_16x16_avx2;
         p.cu[BLOCK_32x32].sub_ps = x265_pixel_sub_ps_32x32_avx2;


More information about the x265-devel mailing list