[x265] [PATCH] asm: avx2 code for addavg for chroma i422 for 10 bpp
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Mon May 25 12:20:45 CEST 2015
# HG changeset patch
# User Sumalatha Polureddy
# Date 1432549239 -19800
# Mon May 25 15:50:39 2015 +0530
# Node ID 1404801f5874bb421b57b1e46c84ad888f5250a2
# Parent 9934ac4ab4572583b1c47ea3e07f3e0968c4a903
asm: avx2 code for addavg for chroma i422 for 10 bpp
sse4:
[i422] addAvg[ 8x16] 8.38x 633.31 5306.14
[i422] addAvg[ 8x8] 8.16x 332.22 2709.76
[i422] addAvg[ 8x4] 7.22x 195.01 1407.87
[i422] addAvg[ 8x12] 8.26x 486.14 4014.58
[i422] addAvg[16x32] 9.65x 2038.33 19677.23
[i422] addAvg[16x16] 9.61x 1044.47 10037.39
[i422] addAvg[ 8x32] 8.39x 1270.14 10661.89
[i422] addAvg[ 16x8] 8.68x 571.52 4962.53
[i422] addAvg[16x24] 9.65x 1538.56 14847.71
[i422] addAvg[32x64] 9.07x 7906.02 71728.40
[i422] addAvg[32x32] 9.53x 3957.30 37717.66
[i422] addAvg[16x64] 9.66x 4059.49 39210.25
[i422] addAvg[ 8x64] 8.50x 2473.50 21018.11
avx2:
[i422] addAvg[ 8x16] 9.70x 536.21 5198.67
[i422] addAvg[ 8x8] 9.22x 293.60 2707.59
[i422] addAvg[ 8x4] 7.74x 183.32 1418.92
[i422] addAvg[ 8x12] 9.44x 411.13 3882.16
[i422] addAvg[16x32] 15.71x 1265.03 19870.60
[i422] addAvg[16x16] 15.66x 641.44 10046.95
[i422] addAvg[ 8x32] 10.16x 1050.35 10668.03
[i422] addAvg[ 16x8] 15.02x 330.31 4961.08
[i422] addAvg[16x24] 15.62x 953.34 14886.83
[i422] addAvg[32x64] 17.83x 4074.81 72663.34
[i422] addAvg[32x32] 18.34x 2103.27 38567.63
[i422] addAvg[16x64] 15.46x 2492.60 38525.36
[i422] addAvg[ 8x64] 10.30x 2041.04 21032.65
diff -r 9934ac4ab457 -r 1404801f5874 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon May 25 11:47:40 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon May 25 15:50:39 2015 +0530
@@ -1236,6 +1236,20 @@
p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg = x265_addAvg_32x24_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg = x265_addAvg_32x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg = x265_addAvg_8x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg = x265_addAvg_16x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = x265_addAvg_32x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg = x265_addAvg_8x8_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg = x265_addAvg_16x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg = x265_addAvg_8x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg = x265_addAvg_32x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg = x265_addAvg_16x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg = x265_addAvg_8x12_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg = x265_addAvg_8x4_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg = x265_addAvg_16x24_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg = x265_addAvg_16x8_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg = x265_addAvg_8x64_avx2;
+
p.cu[BLOCK_4x4].psy_cost_ss = x265_psyCost_ss_4x4_avx2;
p.cu[BLOCK_8x8].psy_cost_ss = x265_psyCost_ss_8x8_avx2;
p.cu[BLOCK_16x16].psy_cost_ss = x265_psyCost_ss_16x16_avx2;
More information about the x265-devel
mailing list