[x265] [PATCH] asm: avx2 code for addavg for chroma i422 for 10 bpp

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Mon May 25 12:20:45 CEST 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1432549239 -19800
#      Mon May 25 15:50:39 2015 +0530
# Node ID 1404801f5874bb421b57b1e46c84ad888f5250a2
# Parent  9934ac4ab4572583b1c47ea3e07f3e0968c4a903
asm: avx2 code for addavg for chroma i422 for 10 bpp

sse4:
 [i422]  addAvg[ 8x16]  8.38x    633.31          5306.14
 [i422]  addAvg[  8x8]  8.16x    332.22          2709.76
 [i422]  addAvg[  8x4]  7.22x    195.01          1407.87
 [i422]  addAvg[ 8x12]  8.26x    486.14          4014.58
 [i422]  addAvg[16x32]  9.65x    2038.33         19677.23
 [i422]  addAvg[16x16]  9.61x    1044.47         10037.39
 [i422]  addAvg[ 8x32]  8.39x    1270.14         10661.89
 [i422]  addAvg[ 16x8]  8.68x    571.52          4962.53
 [i422]  addAvg[16x24]  9.65x    1538.56         14847.71
 [i422]  addAvg[32x64]  9.07x    7906.02         71728.40
 [i422]  addAvg[32x32]  9.53x    3957.30         37717.66
 [i422]  addAvg[16x64]  9.66x    4059.49         39210.25
 [i422]  addAvg[ 8x64]  8.50x    2473.50         21018.11

 avx2:
  [i422]  addAvg[ 8x16]  9.70x    536.21          5198.67
 [i422]  addAvg[  8x8]  9.22x    293.60          2707.59
 [i422]  addAvg[  8x4]  7.74x    183.32          1418.92
 [i422]  addAvg[ 8x12]  9.44x    411.13          3882.16
 [i422]  addAvg[16x32]  15.71x   1265.03         19870.60
 [i422]  addAvg[16x16]  15.66x   641.44          10046.95
 [i422]  addAvg[ 8x32]  10.16x   1050.35         10668.03
 [i422]  addAvg[ 16x8]  15.02x   330.31          4961.08
 [i422]  addAvg[16x24]  15.62x   953.34          14886.83
 [i422]  addAvg[32x64]  17.83x   4074.81         72663.34
 [i422]  addAvg[32x32]  18.34x   2103.27         38567.63
 [i422]  addAvg[16x64]  15.46x   2492.60         38525.36
 [i422]  addAvg[ 8x64]  10.30x   2041.04         21032.65

diff -r 9934ac4ab457 -r 1404801f5874 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon May 25 11:47:40 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Mon May 25 15:50:39 2015 +0530
@@ -1236,6 +1236,20 @@
         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg = x265_addAvg_32x24_avx2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg = x265_addAvg_32x32_avx2;
 
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg = x265_addAvg_8x16_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg = x265_addAvg_16x32_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = x265_addAvg_32x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg = x265_addAvg_8x8_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg = x265_addAvg_16x16_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg = x265_addAvg_8x32_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg = x265_addAvg_32x32_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg = x265_addAvg_16x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg = x265_addAvg_8x12_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg = x265_addAvg_8x4_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg = x265_addAvg_16x24_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg = x265_addAvg_16x8_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg = x265_addAvg_8x64_avx2;
+
         p.cu[BLOCK_4x4].psy_cost_ss = x265_psyCost_ss_4x4_avx2;
         p.cu[BLOCK_8x8].psy_cost_ss = x265_psyCost_ss_8x8_avx2;
         p.cu[BLOCK_16x16].psy_cost_ss = x265_psyCost_ss_16x16_avx2;


More information about the x265-devel mailing list