[x265] [PATCH] asm: avx2 code for sad_x4[16xN] for 10 bpp

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Tue May 19 09:01:19 CEST 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1432018871 -19800
#      Tue May 19 12:31:11 2015 +0530
# Node ID 7423bf9989d3def6f009a2dc813ac245d9789100
# Parent  fd1f061f22290c209560abc5fd02d6401477861a
asm: avx2 code for sad_x4[16xN] for 10 bpp

sse2
sad_x4[ 16x4]  2.80x    976.64          2730.64
sad_x4[ 16x8]  2.97x    1718.50         5111.16
sad_x4[16x12]  3.04x    2475.38         7525.02
sad_x4[16x16]  3.09x    3122.67         9651.31
sad_x4[16x32]  2.83x    6974.52         19741.04
sad_x4[16x64]  3.07x    12935.32        39669.09

avx2
sad_x4[ 16x4]  4.93x    518.46          2555.28
sad_x4[ 16x8]  5.91x    852.26          5038.35
sad_x4[16x12]  6.30x    1185.09         7470.80
sad_x4[16x16]  6.27x    1533.31         9617.03
sad_x4[16x32]  5.82x    3501.26         20373.02
sad_x4[16x64]  6.60x    6106.51         40281.86

diff -r fd1f061f2229 -r 7423bf9989d3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue May 19 10:40:00 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue May 19 12:31:11 2015 +0530
@@ -1346,6 +1346,13 @@
         p.pu[LUMA_64x48].sad_x3 = x265_pixel_sad_x3_64x48_avx2;
         p.pu[LUMA_64x64].sad_x3 = x265_pixel_sad_x3_64x64_avx2;
 
+        p.pu[LUMA_16x4].sad_x4 = x265_pixel_sad_x4_16x4_avx2;
+        p.pu[LUMA_16x8].sad_x4 = x265_pixel_sad_x4_16x8_avx2;
+        p.pu[LUMA_16x12].sad_x4 = x265_pixel_sad_x4_16x12_avx2;
+        p.pu[LUMA_16x16].sad_x4 = x265_pixel_sad_x4_16x16_avx2;
+        p.pu[LUMA_16x32].sad_x4 = x265_pixel_sad_x4_16x32_avx2;
+        p.pu[LUMA_16x64].sad_x4 = x265_pixel_sad_x4_16x64_avx2;
+
         p.pu[LUMA_16x4].convert_p2s = x265_filterPixelToShort_16x4_avx2;
         p.pu[LUMA_16x8].convert_p2s = x265_filterPixelToShort_16x8_avx2;
         p.pu[LUMA_16x12].convert_p2s = x265_filterPixelToShort_16x12_avx2;
diff -r fd1f061f2229 -r 7423bf9989d3 source/common/x86/sad16-a.asm
--- a/source/common/x86/sad16-a.asm	Tue May 19 10:40:00 2015 +0530
+++ b/source/common/x86/sad16-a.asm	Tue May 19 12:31:11 2015 +0530
@@ -1502,6 +1502,10 @@
 SAD_X 3, 64, 48
 SAD_X 3, 64, 64
 %define XMM_REGS 9
-SAD_X 4, 16, 16
+SAD_X 4, 16,  4
 SAD_X 4, 16,  8
+SAD_X 4, 16,  12
+SAD_X 4, 16,  16
+SAD_X 4, 16,  32
+SAD_X 4, 16,  64
 


More information about the x265-devel mailing list