[x265] [PATCH 4 of 5] asm: call avx code for copy_ss[32x32] and chroma copy_ss[32x32]

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Wed Mar 25 06:35:48 CET 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1427260999 -19800
#      Wed Mar 25 10:53:19 2015 +0530
# Node ID f2464e65b1afe7ffe2e24b69dc6946a918d5ff5c
# Parent  d5e09e076f09bc9f1c584a2ae0f7b146cdb81d78
asm: call avx code for copy_ss[32x32] and chroma copy_ss[32x32]

sse3
copy_ss[32x32]  8.83x    1258.84         11120.56
[i420] copy_ss[32x32]  7.86x    1417.72         11147.99

avx
copy_ss[32x32]  16.77x   664.79          11150.48
[i420] copy_ss[32x32]  14.60x   748.45          10926.18

diff -r d5e09e076f09 -r f2464e65b1af source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Mar 25 10:51:38 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Mar 25 10:53:19 2015 +0530
@@ -1381,8 +1381,10 @@
         p.ssim_end_4 = x265_pixel_ssim_end4_avx;
 
         p.cu[BLOCK_16x16].copy_ss = x265_blockcopy_ss_16x16_avx;
+        p.cu[BLOCK_32x32].copy_ss = x265_blockcopy_ss_32x32_avx;
         p.cu[BLOCK_64x64].copy_ss = x265_blockcopy_ss_64x64_avx;
         p.chroma[X265_CSP_I420].cu[CHROMA_420_16x16].copy_ss = x265_blockcopy_ss_16x16_avx;
+        p.chroma[X265_CSP_I420].cu[CHROMA_420_32x32].copy_ss = x265_blockcopy_ss_32x32_avx;
         p.chroma[X265_CSP_I422].cu[CHROMA_422_16x32].copy_ss = x265_blockcopy_ss_16x32_avx;
 
         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].copy_pp = x265_blockcopy_pp_32x8_avx;


More information about the x265-devel mailing list