[x265] [PATCH] asm: avx2 code for chroma sub_ps module, reused luma code
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Thu Apr 16 07:58:35 CEST 2015
# HG changeset patch
# User Sumalatha Polureddy
# Date 1429161378 -19800
# Thu Apr 16 10:46:18 2015 +0530
# Node ID 861bd21904a3cf54c16955b07940b1d16247d365
# Parent 7d3cb1832fed137c6362c3d1e8f29f6be7de113d
asm: avx2 code for chroma sub_ps module, reused luma code
sse4
[i422] sub_ps[16x32] 5.50x 1386.46 7627.27
[i422] sub_ps[32x64] 5.28x 5137.07 27110.01
avx2
[i422] sub_ps[16x32] 9.22x 831.52 7665.70
[i422] sub_ps[32x64] 10.59x 2581.10 27343.41
diff -r 7d3cb1832fed -r 861bd21904a3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Apr 16 10:36:39 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Apr 16 10:46:18 2015 +0530
@@ -1673,6 +1673,8 @@
p.cu[BLOCK_64x64].sub_ps = x265_pixel_sub_ps_64x64_avx2;
p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].sub_ps = x265_pixel_sub_ps_16x16_avx2;
p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sub_ps = x265_pixel_sub_ps_32x32_avx2;
+ p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sub_ps = x265_pixel_sub_ps_16x32_avx2;
+ p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sub_ps = x265_pixel_sub_ps_32x64_avx2;
p.pu[LUMA_16x4].pixelavg_pp = x265_pixel_avg_16x4_avx2;
p.pu[LUMA_16x8].pixelavg_pp = x265_pixel_avg_16x8_avx2;
diff -r 7d3cb1832fed -r 861bd21904a3 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Thu Apr 16 10:36:39 2015 +0530
+++ b/source/common/x86/pixel.h Thu Apr 16 10:46:18 2015 +0530
@@ -263,6 +263,8 @@
void x265_pixel_sub_ps_16x16_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
void x265_pixel_sub_ps_32x32_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
void x265_pixel_sub_ps_64x64_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
+void x265_pixel_sub_ps_16x32_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
+void x265_pixel_sub_ps_32x64_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
int x265_psyCost_pp_4x4_avx2(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);
int x265_psyCost_pp_8x8_avx2(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);
More information about the x265-devel
mailing list