[x265] [PATCH 3 of 5] asm: avx2 code for sub_ps for chroma sizes 16x16, 32x32, reused the code from luma

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Wed Mar 25 06:35:47 CET 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1427260898 -19800
#      Wed Mar 25 10:51:38 2015 +0530
# Node ID d5e09e076f09bc9f1c584a2ae0f7b146cdb81d78
# Parent  f8fa1887edcec2008fb3d6e7bbd58b681c8f6cc5
asm: avx2 code for sub_ps for chroma sizes 16x16, 32x32, reused the code from luma

sse3
[i420]  sub_ps[16x16]  5.27x    719.40          3788.99
[i420]  sub_ps[32x32]  5.39x    2605.93         14054.38

avx2
[i420]  sub_ps[16x16]  7.88x    480.04          3785.06
[i420]  sub_ps[32x32]  10.14x   1386.92         14063.74

diff -r f8fa1887edce -r d5e09e076f09 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Mar 25 10:50:35 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Mar 25 10:51:38 2015 +0530
@@ -1490,6 +1490,8 @@
         p.cu[BLOCK_16x16].sub_ps = x265_pixel_sub_ps_16x16_avx2;
         p.cu[BLOCK_32x32].sub_ps = x265_pixel_sub_ps_32x32_avx2;
         p.cu[BLOCK_64x64].sub_ps = x265_pixel_sub_ps_64x64_avx2;
+        p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].sub_ps = x265_pixel_sub_ps_16x16_avx2;
+        p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sub_ps = x265_pixel_sub_ps_32x32_avx2;
 
         p.pu[LUMA_16x4].pixelavg_pp = x265_pixel_avg_16x4_avx2;
         p.pu[LUMA_16x8].pixelavg_pp = x265_pixel_avg_16x8_avx2;


More information about the x265-devel mailing list