[x265] [PATCH] asm: chroma_hpp[i422][8xN, 16xN, 32xN, 6x16, 12x32, 24x64] avx2 code for 16bpp

rajesh at multicorewareinc.com rajesh at multicorewareinc.com
Thu May 28 13:16:25 CEST 2015


# HG changeset patch
# User Rajesh Paulraj<rajesh at multicorewareinc.com>
# Date 1432811286 -19800
#      Thu May 28 16:38:06 2015 +0530
# Node ID ac15f079bd838b1aa874b2787035a7f52e2b2c1e
# Parent  09b0056ca229c87288ef0169ed2d169b706b237b
asm: chroma_hpp[i422][8xN, 16xN, 32xN, 6x16, 12x32, 24x64] avx2 code for 16bpp

diff -r 09b0056ca229 -r ac15f079bd83 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu May 28 15:30:24 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu May 28 16:38:06 2015 +0530
@@ -1545,6 +1545,24 @@
         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hpp = x265_interp_4tap_horiz_pp_32x32_avx2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].filter_hpp = x265_interp_4tap_horiz_pp_12x16_avx2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].filter_hpp = x265_interp_4tap_horiz_pp_24x32_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_hpp = x265_interp_4tap_horiz_pp_6x16_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].filter_hpp = x265_interp_4tap_horiz_pp_8x4_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].filter_hpp = x265_interp_4tap_horiz_pp_8x8_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].filter_hpp = x265_interp_4tap_horiz_pp_8x12_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].filter_hpp = x265_interp_4tap_horiz_pp_8x16_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].filter_hpp = x265_interp_4tap_horiz_pp_8x32_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].filter_hpp = x265_interp_4tap_horiz_pp_8x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_hpp = x265_interp_4tap_horiz_pp_16x8_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_hpp = x265_interp_4tap_horiz_pp_16x16_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_hpp = x265_interp_4tap_horiz_pp_16x24_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_hpp = x265_interp_4tap_horiz_pp_16x32_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_hpp = x265_interp_4tap_horiz_pp_16x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_hpp = x265_interp_4tap_horiz_pp_32x16_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_hpp = x265_interp_4tap_horiz_pp_32x32_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_hpp = x265_interp_4tap_horiz_pp_32x48_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_hpp = x265_interp_4tap_horiz_pp_32x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_hpp = x265_interp_4tap_horiz_pp_12x32_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_hpp = x265_interp_4tap_horiz_pp_24x64_avx2;
         p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_hpp = x265_interp_4tap_horiz_pp_64x16_avx2;
         p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_hpp = x265_interp_4tap_horiz_pp_64x32_avx2;
         p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_hpp = x265_interp_4tap_horiz_pp_64x48_avx2;
diff -r 09b0056ca229 -r ac15f079bd83 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm	Thu May 28 15:30:24 2015 +0530
+++ b/source/common/x86/ipfilter16.asm	Thu May 28 16:38:06 2015 +0530
@@ -2585,8 +2585,10 @@
 %endmacro
 IPFILTER_CHROMA_avx2_8xN 6
 IPFILTER_CHROMA_avx2_8xN 8
+IPFILTER_CHROMA_avx2_8xN 12
 IPFILTER_CHROMA_avx2_8xN 16
 IPFILTER_CHROMA_avx2_8xN 32
+IPFILTER_CHROMA_avx2_8xN 64
 
 ;-------------------------------------------------------------------------------------------------------------
 ; void interp_4tap_horiz_pp(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx
@@ -2659,7 +2661,9 @@
 IPFILTER_CHROMA_avx2_16xN 8
 IPFILTER_CHROMA_avx2_16xN 12
 IPFILTER_CHROMA_avx2_16xN 16
+IPFILTER_CHROMA_avx2_16xN 24
 IPFILTER_CHROMA_avx2_16xN 32
+IPFILTER_CHROMA_avx2_16xN 64
 
 ;-------------------------------------------------------------------------------------------------------------
 ; void interp_4tap_horiz_pp(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx
@@ -2734,6 +2738,8 @@
 IPFILTER_CHROMA_avx2_32xN 16
 IPFILTER_CHROMA_avx2_32xN 24
 IPFILTER_CHROMA_avx2_32xN 32
+IPFILTER_CHROMA_avx2_32xN 48
+IPFILTER_CHROMA_avx2_32xN 64
 
 ;-------------------------------------------------------------------------------------------------------------
 ; void interp_4tap_horiz_pp(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx
diff -r 09b0056ca229 -r ac15f079bd83 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h	Thu May 28 15:30:24 2015 +0530
+++ b/source/common/x86/ipfilter8.h	Thu May 28 16:38:06 2015 +0530
@@ -398,6 +398,7 @@
 CHROMA_422_P2S_FILTERS_SSE4(_sse4);
 CHROMA_422_P2S_FILTERS_SSSE3(_ssse3);
 CHROMA_422_P2S_FILTERS_AVX2(_avx2);
+CHROMA_422_HORIZ_FILTERS(_avx2);
 
 CHROMA_444_VERT_FILTERS(_sse2);
 CHROMA_444_HORIZ_FILTERS(_sse4);


More information about the x265-devel mailing list