[x265] [PATCH] asm: avx2 code for satd for all chroma i420

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Fri May 22 10:59:41 CEST 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1432285175 -19800
#      Fri May 22 14:29:35 2015 +0530
# Node ID 40890fe416e34b422078f7196f5d3c20d69e8754
# Parent  2d8f8053ef1fd1daf5c175143d373c5ded6f17da
asm: avx2 code for satd for all chroma i420

[i420] satd[  8x8]  5.18x    479.56          2483.25
[i420] satd[  8x4]  4.05x    310.37          1256.61
[i420] satd[  4x8]  5.20x    305.01          1586.95
[i420] satd[16x16]  9.48x    969.91          9199.45
[i420] satd[ 16x8]  8.91x    552.74          4927.23
[i420] satd[ 8x16]  4.75x    1005.37         4775.97
[i420] satd[ 16x4]  8.08x    305.02          2464.30
[i420] satd[16x12]  9.34x    763.10          7129.44
[i420] satd[ 4x16]  5.07x    590.03          2991.04
[i420] satd[12x16]  4.75x    1848.51         8778.75
[i420] satd[32x32]  9.65x    3857.45         37239.36
[i420] satd[32x16]  9.22x    2012.41         18556.10
[i420] satd[16x32]  10.34x   1844.08         19060.68
[i420] satd[ 32x8]  9.29x    1017.61         9456.87
[i420] satd[32x24]  9.48x    2789.28         26440.75
[i420] satd[ 8x32]  4.83x    1938.89         9369.64
[i420] satd[24x32]  5.19x    5460.95         28344.00

diff -r 2d8f8053ef1f -r 40890fe416e3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri May 22 10:39:30 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri May 22 14:29:35 2015 +0530
@@ -1072,6 +1072,14 @@
         p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd = x265_pixel_satd_4x32_avx;
 
         ALL_LUMA_PU(satd, pixel_satd, avx);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd = x265_pixel_satd_8x8_avx;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd = x265_pixel_satd_8x4_avx;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].satd = x265_pixel_satd_8x16_avx;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].satd = x265_pixel_satd_8x32_avx;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd = x265_pixel_satd_12x16_avx;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].satd = x265_pixel_satd_24x32_avx;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd = x265_pixel_satd_4x16_avx;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd = x265_pixel_satd_4x8_avx;
         ASSIGN_SA8D(avx);
         LUMA_VAR(avx);
         p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_avx;
@@ -1256,6 +1264,15 @@
         p.pu[LUMA_16x16].satd = x265_pixel_satd_16x16_avx2;
         p.pu[LUMA_16x32].satd = x265_pixel_satd_16x32_avx2;
         p.pu[LUMA_16x64].satd = x265_pixel_satd_16x64_avx2;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].satd = x265_pixel_satd_16x16_avx2;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].satd = x265_pixel_satd_16x8_avx2;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].satd = x265_pixel_satd_16x32_avx2;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].satd = x265_pixel_satd_16x12_avx2;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].satd = x265_pixel_satd_16x4_avx2;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].satd = x265_pixel_satd_32x32_avx2;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].satd = x265_pixel_satd_32x16_avx2;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].satd = x265_pixel_satd_32x24_avx2;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].satd = x265_pixel_satd_32x8_avx2;
 
         p.cu[BLOCK_16x16].ssd_s = x265_pixel_ssd_s_16_avx2;
         p.cu[BLOCK_32x32].ssd_s = x265_pixel_ssd_s_32_avx2;


More information about the x265-devel mailing list