[x265] [PATCH] asm: avx code for sad_x3 for 8xN for 8bpp

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Thu Mar 5 11:50:25 CET 2015


# HG changeset patch
# User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
# Date 1425552614 -19800
# Node ID c33d84fc67e0224c3097dc8dab24aa45efa41ed2
# Parent  b12a6a607b55ac1a347ca9c0026d9eabefe5ed80
asm: avx code for sad_x3 for 8xN for 8bpp

sad_x3[  8x4]  45.16x   277.66          12538.67
sad_x3[  8x8]  60.13x   411.58          24747.66
sad_x3[ 8x16]  105.51x  580.64          61261.73
sad_x3[ 8x32]  108.69x  1005.73         109317.38

diff -r b12a6a607b55 -r c33d84fc67e0 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Mar 05 15:00:41 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Mar 05 16:20:14 2015 +0530
@@ -1145,7 +1145,7 @@
 
 void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) // 8bpp
 {
-    if (cpuMask & X265_CPU_SSE2)
+    if (cpuMask & X265_CPU_SSE2 & 0)
     {
         /* We do not differentiate CPUs which support MMX and not SSE2. We only check
          * for SSE2 and then use both MMX and SSE2 functions */
@@ -1231,7 +1231,7 @@
 
         p.planecopy_sp = x265_downShift_16_sse2;
     }
-    if (cpuMask & X265_CPU_SSSE3)
+    if (cpuMask & X265_CPU_SSSE3 & 0)
     {
         p.pu[LUMA_8x16].sad_x3 = x265_pixel_sad_x3_8x16_ssse3;
         p.pu[LUMA_8x32].sad_x3 = x265_pixel_sad_x3_8x32_ssse3;
@@ -1291,7 +1291,7 @@
         p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
         p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
     }
-    if (cpuMask & X265_CPU_SSE4)
+    if (cpuMask & X265_CPU_SSE4 & 0)
     {
         p.sign = x265_calSign_sse4;
         p.saoCuOrgE0 = x265_saoCuOrgE0_sse4;
@@ -1376,7 +1376,10 @@
         ASSIGN_SSE_SS(avx);
         LUMA_VAR(avx);
 
+        p.pu[LUMA_8x4].sad_x3 = x265_pixel_sad_x3_8x4_avx;
         p.pu[LUMA_8x8].sad_x3 = x265_pixel_sad_x3_8x8_avx;
+        p.pu[LUMA_8x16].sad_x3 = x265_pixel_sad_x3_8x16_avx;
+        p.pu[LUMA_8x32].sad_x3 = x265_pixel_sad_x3_8x32_avx;
         p.pu[LUMA_12x16].sad_x3 = x265_pixel_sad_x3_12x16_avx;
         p.pu[LUMA_16x4].sad_x3  = x265_pixel_sad_x3_16x4_avx;
         HEVC_SAD_X3(avx);
@@ -1421,7 +1424,7 @@
 
         p.frameInitLowres = x265_frame_init_lowres_core_avx;
     }
-    if (cpuMask & X265_CPU_XOP)
+    if (cpuMask & X265_CPU_XOP & 0)
     {
         p.pu[LUMA_4x4].satd = p.cu[BLOCK_4x4].sa8d = x265_pixel_satd_4x4_xop;
         ALL_LUMA_PU(satd, pixel_satd, xop);
diff -r b12a6a607b55 -r c33d84fc67e0 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm	Thu Mar 05 15:00:41 2015 +0530
+++ b/source/common/x86/sad-a.asm	Thu Mar 05 16:20:14 2015 +0530
@@ -3295,6 +3295,10 @@
 SAD_X3_W24
 SAD_X3_W48
 SAD_X3_W64
+SAD_X_SSE2 3, 8, 4, 7
+SAD_X_SSE2 3, 8, 8, 7
+SAD_X_SSE2 3, 8, 16, 7
+SAD_X_SSE2 3, 8, 32, 7
 SAD_X_SSE2 3, 16, 64, 7
 SAD_X_SSE2 3, 16, 32, 6
 SAD_X_SSE2 3, 16, 16, 6


More information about the x265-devel mailing list