[x265] [PATCH] asm: avx2 code for chroma sub_ps module, reused luma code

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Thu Apr 16 07:58:35 CEST 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1429161378 -19800
#      Thu Apr 16 10:46:18 2015 +0530
# Node ID 861bd21904a3cf54c16955b07940b1d16247d365
# Parent  7d3cb1832fed137c6362c3d1e8f29f6be7de113d
asm: avx2 code for chroma sub_ps module, reused luma code

sse4
[i422]  sub_ps[16x32]  5.50x    1386.46         7627.27
[i422]  sub_ps[32x64]  5.28x    5137.07         27110.01

avx2
[i422]  sub_ps[16x32]  9.22x    831.52          7665.70
[i422]  sub_ps[32x64]  10.59x   2581.10         27343.41

diff -r 7d3cb1832fed -r 861bd21904a3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Apr 16 10:36:39 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Apr 16 10:46:18 2015 +0530
@@ -1673,6 +1673,8 @@
         p.cu[BLOCK_64x64].sub_ps = x265_pixel_sub_ps_64x64_avx2;
         p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].sub_ps = x265_pixel_sub_ps_16x16_avx2;
         p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sub_ps = x265_pixel_sub_ps_32x32_avx2;
+        p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sub_ps = x265_pixel_sub_ps_16x32_avx2;
+        p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sub_ps = x265_pixel_sub_ps_32x64_avx2;
 
         p.pu[LUMA_16x4].pixelavg_pp = x265_pixel_avg_16x4_avx2;
         p.pu[LUMA_16x8].pixelavg_pp = x265_pixel_avg_16x8_avx2;
diff -r 7d3cb1832fed -r 861bd21904a3 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Thu Apr 16 10:36:39 2015 +0530
+++ b/source/common/x86/pixel.h	Thu Apr 16 10:46:18 2015 +0530
@@ -263,6 +263,8 @@
 void x265_pixel_sub_ps_16x16_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
 void x265_pixel_sub_ps_32x32_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
 void x265_pixel_sub_ps_64x64_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
+void x265_pixel_sub_ps_16x32_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
+void x265_pixel_sub_ps_32x64_avx2(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1);
 
 int x265_psyCost_pp_4x4_avx2(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);
 int x265_psyCost_pp_8x8_avx2(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);


More information about the x265-devel mailing list