[x265] [PATCH] asm: avx code for sad_x3 for 8xN for 8bpp
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Thu Mar 5 11:50:25 CET 2015
# HG changeset patch
# User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
# Date 1425552614 -19800
# Node ID c33d84fc67e0224c3097dc8dab24aa45efa41ed2
# Parent b12a6a607b55ac1a347ca9c0026d9eabefe5ed80
asm: avx code for sad_x3 for 8xN for 8bpp
sad_x3[ 8x4] 45.16x 277.66 12538.67
sad_x3[ 8x8] 60.13x 411.58 24747.66
sad_x3[ 8x16] 105.51x 580.64 61261.73
sad_x3[ 8x32] 108.69x 1005.73 109317.38
diff -r b12a6a607b55 -r c33d84fc67e0 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Mar 05 15:00:41 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Mar 05 16:20:14 2015 +0530
@@ -1145,7 +1145,7 @@
void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) // 8bpp
{
- if (cpuMask & X265_CPU_SSE2)
+ if (cpuMask & X265_CPU_SSE2 & 0)
{
/* We do not differentiate CPUs which support MMX and not SSE2. We only check
* for SSE2 and then use both MMX and SSE2 functions */
@@ -1231,7 +1231,7 @@
p.planecopy_sp = x265_downShift_16_sse2;
}
- if (cpuMask & X265_CPU_SSSE3)
+ if (cpuMask & X265_CPU_SSSE3 & 0)
{
p.pu[LUMA_8x16].sad_x3 = x265_pixel_sad_x3_8x16_ssse3;
p.pu[LUMA_8x32].sad_x3 = x265_pixel_sad_x3_8x32_ssse3;
@@ -1291,7 +1291,7 @@
p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
}
- if (cpuMask & X265_CPU_SSE4)
+ if (cpuMask & X265_CPU_SSE4 & 0)
{
p.sign = x265_calSign_sse4;
p.saoCuOrgE0 = x265_saoCuOrgE0_sse4;
@@ -1376,7 +1376,10 @@
ASSIGN_SSE_SS(avx);
LUMA_VAR(avx);
+ p.pu[LUMA_8x4].sad_x3 = x265_pixel_sad_x3_8x4_avx;
p.pu[LUMA_8x8].sad_x3 = x265_pixel_sad_x3_8x8_avx;
+ p.pu[LUMA_8x16].sad_x3 = x265_pixel_sad_x3_8x16_avx;
+ p.pu[LUMA_8x32].sad_x3 = x265_pixel_sad_x3_8x32_avx;
p.pu[LUMA_12x16].sad_x3 = x265_pixel_sad_x3_12x16_avx;
p.pu[LUMA_16x4].sad_x3 = x265_pixel_sad_x3_16x4_avx;
HEVC_SAD_X3(avx);
@@ -1421,7 +1424,7 @@
p.frameInitLowres = x265_frame_init_lowres_core_avx;
}
- if (cpuMask & X265_CPU_XOP)
+ if (cpuMask & X265_CPU_XOP & 0)
{
p.pu[LUMA_4x4].satd = p.cu[BLOCK_4x4].sa8d = x265_pixel_satd_4x4_xop;
ALL_LUMA_PU(satd, pixel_satd, xop);
diff -r b12a6a607b55 -r c33d84fc67e0 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm Thu Mar 05 15:00:41 2015 +0530
+++ b/source/common/x86/sad-a.asm Thu Mar 05 16:20:14 2015 +0530
@@ -3295,6 +3295,10 @@
SAD_X3_W24
SAD_X3_W48
SAD_X3_W64
+SAD_X_SSE2 3, 8, 4, 7
+SAD_X_SSE2 3, 8, 8, 7
+SAD_X_SSE2 3, 8, 16, 7
+SAD_X_SSE2 3, 8, 32, 7
SAD_X_SSE2 3, 16, 64, 7
SAD_X_SSE2 3, 16, 32, 6
SAD_X_SSE2 3, 16, 16, 6
More information about the x265-devel
mailing list