[x265] [PATCH] asm: avx2 code for chroma sse_pp[16x16, 32x32] for i420

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Thu Jun 4 11:55:54 CEST 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1433411747 -19800
#      Thu Jun 04 15:25:47 2015 +0530
# Node ID 62adf0ef875f9c310250bada6f9a699c9f8ff758
# Parent  1ea6ca2517741221f7b851847df8f4799c3e03d0
asm: avx2 code for chroma sse_pp[16x16, 32x32] for i420

[i420] sse_pp[16x16]  7.34x    363.66          2668.39
[i420] sse_pp[32x32]  8.76x    1059.07         9281.41

diff -r 1ea6ca251774 -r 62adf0ef875f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Jun 04 12:37:07 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Jun 04 15:25:47 2015 +0530
@@ -1362,6 +1362,8 @@
         p.cu[BLOCK_16x16].sse_pp = x265_pixel_ssd_16x16_avx2;
         p.cu[BLOCK_32x32].sse_pp = x265_pixel_ssd_32x32_avx2;
         p.cu[BLOCK_64x64].sse_pp = x265_pixel_ssd_64x64_avx2;
+        p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].sse_pp = x265_pixel_ssd_16x16_avx2;
+        p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sse_pp = x265_pixel_ssd_32x32_avx2;
 
         p.quant = x265_quant_avx2;
         p.nquant = x265_nquant_avx2;
diff -r 1ea6ca251774 -r 62adf0ef875f source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Thu Jun 04 12:37:07 2015 +0530
+++ b/source/test/pixelharness.cpp	Thu Jun 04 15:25:47 2015 +0530
@@ -1602,6 +1602,14 @@
         }
         if (part < NUM_CU_SIZES)
         {
+            if (opt.chroma[i].cu[part].sse_pp)
+            {
+                if (!check_pixelcmp(ref.chroma[i].cu[part].sse_pp, opt.chroma[i].cu[part].sse_pp))
+                {
+                    printf("chroma_sse_pp[%s][%s]: failed!\n", x265_source_csp_names[i], chromaPartStr[part]);
+                    return false;
+                }
+            }
             if (opt.chroma[i].cu[part].sub_ps)
             {
                 if (!check_pixel_sub_ps(ref.chroma[i].cu[part].sub_ps, opt.chroma[i].cu[part].sub_ps))
@@ -2137,6 +2145,11 @@
                 HEADER("[%s] copy_sp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
                 REPORT_SPEEDUP(opt.chroma[i].cu[part].copy_sp, ref.chroma[i].cu[part].copy_sp, pbuf1, 64, sbuf3, 128);
             }
+            if (opt.chroma[i].cu[part].sse_pp)
+            {
+                HEADER("[%s] sse_pp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
+                REPORT_SPEEDUP(opt.chroma[i].cu[part].sse_pp, ref.chroma[i].cu[part].sse_pp, pbuf1, STRIDE, fref, STRIDE);
+            }
             if (opt.chroma[i].cu[part].sub_ps)
             {
                 HEADER("[%s]  sub_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);


More information about the x265-devel mailing list