[x265] [PATCH] asm: interp_4tap_horiz_pp sse3
dtyx265 at gmail.com
dtyx265 at gmail.com
Wed Apr 22 05:38:12 CEST 2015
# HG changeset patch
# User David T Yuen <dtyx265 at gmail.com>
# Date 1429673867 25200
# Node ID 829814365241f61737f3f39a400f55cc49702679
# Parent c135c117ffb083a00d4353279ea669e8f3f7a8ee
asm: interp_4tap_horiz_pp sse3
This replaces c code for 6x8, 6x16, 8x2, 8x4, 8x6, 8x8, 8x12, 8x16, 8x32, 8x64, 12x16, 12x32, 16x8, 16x12,
16x16, 16x24, 16x32, 16x64, 24x32, 24x64, 32x8, 32x16, 32x24, 32x32, 32x48, 32x64, 48x64, 64x16, 64x32,
64x48, 64x64
Macros are used to add the primitives to asm-primitives.cpp
64-bit
./test/TestBench --testbench interp | grep hpp
chroma_hpp[ 8x8] 3.02x 3087.49 9315.21
chroma_hpp[16x16] 3.09x 11813.11 36504.26
chroma_hpp[32x32] 3.45x 46862.27 161615.81
chroma_hpp[ 8x4] 2.94x 1567.50 4614.73
chroma_hpp[ 16x8] 3.10x 5930.00 18377.70
chroma_hpp[ 8x16] 3.02x 6130.00 18520.00
chroma_hpp[32x16] 3.46x 23330.07 80829.76
chroma_hpp[16x32] 3.16x 23572.66 74452.23
chroma_hpp[ 8x6] 2.93x 2339.99 6863.20
chroma_hpp[ 6x8] 2.52x 2812.50 7075.69
chroma_hpp[ 8x2] 2.25x 812.50 1830.00
chroma_hpp[16x12] 3.10x 8875.07 27545.60
chroma_hpp[12x16] 2.80x 9810.20 27476.36
chroma_hpp[ 16x4] 3.05x 2995.20 9144.59
chroma_hpp[32x24] 3.45x 34970.45 120594.13
chroma_hpp[24x32] 3.49x 35116.79 122662.94
chroma_hpp[ 32x8] 3.45x 11699.82 40402.34
chroma_hpp[ 8x32] 3.00x 12210.00 36603.46
chroma_hpp[ 8x16] 3.02x 6130.00 18520.00
chroma_hpp[16x32] 3.09x 23573.10 72827.95
chroma_hpp[32x64] 3.58x 93938.63 335978.50
chroma_hpp[ 8x8] 3.02x 3087.49 9314.74
chroma_hpp[16x16] 3.09x 11815.00 36545.97
chroma_hpp[ 8x32] 3.02x 12212.27 36870.14
chroma_hpp[32x32] 3.45x 46748.56 161259.67
chroma_hpp[16x64] 3.18x 47185.50 150017.53
chroma_hpp[ 8x12] 3.04x 4607.50 14000.63
chroma_hpp[ 6x16] 2.49x 5570.10 13870.01
chroma_hpp[ 8x4] 2.94x 1570.00 4613.64
chroma_hpp[16x24] 3.08x 17690.69 54547.18
chroma_hpp[12x32] 2.80x 19618.33 54833.57
chroma_hpp[ 16x8] 3.10x 5932.57 18377.34
chroma_hpp[32x48] 3.45x 70041.92 241370.78
chroma_hpp[24x64] 3.53x 70596.84 249020.33
chroma_hpp[32x16] 3.44x 23374.66 80340.53
chroma_hpp[ 8x64] 3.00x 24422.17 73313.97
chroma_hpp[ 8x8] 3.01x 3090.00 9314.26
chroma_hpp[16x16] 3.11x 11810.00 36736.14
chroma_hpp[32x32] 3.47x 46771.40 162154.16
chroma_hpp[64x64] 3.25x 195843.97 636910.44
chroma_hpp[ 8x4] 2.94x 1570.00 4613.35
chroma_hpp[ 16x8] 3.10x 5933.42 18381.31
chroma_hpp[ 8x16] 3.02x 6131.43 18520.17
chroma_hpp[32x16] 3.42x 23450.76 80160.37
chroma_hpp[16x32] 3.09x 23619.58 73027.41
chroma_hpp[64x32] 3.42x 92894.85 318107.38
chroma_hpp[32x64] 3.48x 93646.98 325950.78
chroma_hpp[16x12] 3.10x 8874.99 27503.11
chroma_hpp[12x16] 2.83x 9809.99 27769.48
chroma_hpp[ 16x4] 3.05x 2994.99 9138.53
chroma_hpp[32x24] 3.42x 35123.29 120115.27
chroma_hpp[24x32] 3.53x 35143.41 124032.27
chroma_hpp[ 32x8] 3.46x 11692.58 40400.25
chroma_hpp[ 8x32] 3.02x 12212.50 36843.57
chroma_hpp[64x48] 3.36x 140979.36 473912.28
chroma_hpp[48x64] 3.43x 140712.88 482047.69
chroma_hpp[64x16] 3.39x 46530.16 157859.31
chroma_hpp[16x64] 3.08x 47197.85 145477.02
32-bit
./test/TestBench --testbench interp | grep hpp
chroma_hpp[ 8x8] 2.96x 3164.98 9354.15
chroma_hpp[16x16] 3.07x 11885.01 36438.13
chroma_hpp[32x32] 3.48x 46818.91 162929.45
chroma_hpp[ 8x4] 2.86x 1645.00 4703.57
chroma_hpp[ 16x8] 3.06x 6005.10 18378.64
chroma_hpp[ 8x16] 2.97x 6205.00 18429.90
chroma_hpp[32x16] 3.46x 23463.52 81110.52
chroma_hpp[16x32] 3.10x 23700.07 73429.12
chroma_hpp[ 8x6] 2.89x 2404.99 6942.73
chroma_hpp[ 6x8] 2.46x 2905.00 7155.45
chroma_hpp[ 8x2] 2.69x 885.00 2379.96
chroma_hpp[16x12] 3.07x 8945.04 27458.99
chroma_hpp[12x16] 2.81x 9862.55 27753.80
chroma_hpp[ 16x4] 3.01x 3065.00 9231.22
chroma_hpp[32x24] 3.45x 35140.03 121204.09
chroma_hpp[24x32] 3.51x 35262.80 123779.88
chroma_hpp[ 32x8] 3.47x 11765.00 40847.72
chroma_hpp[ 8x32] 2.98x 12285.00 36623.77
chroma_hpp[ 8x16] 2.97x 6205.00 18429.95
chroma_hpp[16x32] 3.08x 23691.43 72971.20
chroma_hpp[32x64] 3.47x 93595.39 324758.03
chroma_hpp[ 8x8] 2.95x 3165.39 9353.01
chroma_hpp[16x16] 3.07x 11885.00 36438.18
chroma_hpp[ 8x32] 2.98x 12285.21 36614.84
chroma_hpp[32x32] 3.48x 46794.59 162647.84
chroma_hpp[16x64] 3.08x 47299.79 145605.62
chroma_hpp[ 8x12] 2.98x 4685.06 13949.95
chroma_hpp[ 6x16] 2.46x 5672.50 13972.76
chroma_hpp[ 8x4] 2.86x 1645.00 4702.53
chroma_hpp[16x24] 3.06x 17765.06 54398.70
chroma_hpp[12x32] 2.79x 19676.93 54843.11
chroma_hpp[ 16x8] 3.06x 6005.12 18377.65
chroma_hpp[32x48] 3.46x 70176.74 243033.73
chroma_hpp[24x64] 3.51x 70367.40 246988.72
chroma_hpp[32x16] 3.47x 23405.43 81235.64
chroma_hpp[ 8x64] 2.97x 24490.71 72757.92
chroma_hpp[ 8x8] 2.95x 3165.00 9352.45
chroma_hpp[16x16] 3.07x 11885.00 36437.35
chroma_hpp[32x32] 3.48x 46781.39 162731.84
chroma_hpp[64x64] 3.28x 193972.66 635870.62
chroma_hpp[ 8x4] 2.86x 1645.00 4702.79
chroma_hpp[ 16x8] 3.06x 6005.00 18377.74
chroma_hpp[ 8x16] 2.97x 6205.04 18430.28
chroma_hpp[32x16] 3.46x 23452.05 81121.86
chroma_hpp[16x32] 3.07x 23695.18 72740.23
chroma_hpp[64x32] 3.42x 92974.16 317723.12
chroma_hpp[32x64] 3.47x 93467.95 324431.16
chroma_hpp[16x12] 3.07x 8945.09 27457.70
chroma_hpp[12x16] 2.79x 9862.54 27477.89
chroma_hpp[ 16x4] 3.01x 3065.02 9231.55
chroma_hpp[32x24] 3.45x 35161.96 121188.20
chroma_hpp[24x32] 3.51x 35275.57 123776.31
chroma_hpp[ 32x8] 3.47x 11765.00 40847.59
chroma_hpp[ 8x32] 2.98x 12285.06 36637.80
chroma_hpp[64x48] 3.41x 139693.42 476274.88
chroma_hpp[48x64] 3.44x 139707.61 480515.22
chroma_hpp[64x16] 3.41x 46575.90 158769.59
chroma_hpp[16x64] 3.08x 47262.82 145408.81
diff -r c135c117ffb0 -r 829814365241 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Apr 21 13:42:36 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp Tue Apr 21 20:37:47 2015 -0700
@@ -1407,18 +1407,9 @@
}
if (cpuMask & X265_CPU_SSE3)
{
- p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].filter_hpp = x265_interp_4tap_horiz_pp_2x4_sse3;
- p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].filter_hpp = x265_interp_4tap_horiz_pp_2x8_sse3;
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_hpp = x265_interp_4tap_horiz_pp_4x2_sse3;
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_sse3;
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].filter_hpp = x265_interp_4tap_horiz_pp_4x8_sse3;
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].filter_hpp = x265_interp_4tap_horiz_pp_4x16_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_hpp = x265_interp_4tap_horiz_pp_2x8_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_hpp = x265_interp_4tap_horiz_pp_2x16_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_hpp = x265_interp_4tap_horiz_pp_4x8_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_hpp = x265_interp_4tap_horiz_pp_4x16_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_hpp = x265_interp_4tap_horiz_pp_4x32_sse3;
+ ALL_CHROMA_420_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
+ ALL_CHROMA_422_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
+ ALL_CHROMA_444_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
}
if (cpuMask & X265_CPU_SSSE3)
{
diff -r c135c117ffb0 -r 829814365241 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Tue Apr 21 13:42:36 2015 -0500
+++ b/source/common/x86/ipfilter8.asm Tue Apr 21 20:37:47 2015 -0700
@@ -594,6 +594,237 @@
mov [dstq + dststrideq], r4w
%endmacro
+%macro FILTER_H4_w6_sse2 0
+ pxor m4, m4
+ movh m0, [srcq - 1]
+ movh m5, [srcq]
+ punpckldq m0, m5
+ movhlps m2, m0
+ punpcklbw m0, m4
+ punpcklbw m2, m4
+ movd m1, [srcq + 1]
+ movd m5, [srcq + 2]
+ punpckldq m1, m5
+ punpcklbw m1, m4
+ pmaddwd m0, m6
+ pmaddwd m1, m6
+ pmaddwd m2, m6
+ packssdw m0, m1
+ packssdw m2, m2
+ pshuflw m1, m0, q2301
+ pshufhw m1, m1, q2301
+ pshuflw m3, m2, q2301
+ paddw m0, m1
+ paddw m2, m3
+ psrld m0, 16
+ psrld m2, 16
+ packssdw m0, m2
+ paddw m0, m7
+ psraw m0, 6
+ packuswb m0, m0
+ movd [dstq], m0
+ pextrw r4d, m0, 2
+ mov [dstq + 4], r4w
+%endmacro
+
+%macro FILH4W8_sse2 1
+ movh m0, [srcq - 1 + %1]
+ movh m5, [srcq + %1]
+ punpckldq m0, m5
+ movhlps m2, m0
+ punpcklbw m0, m4
+ punpcklbw m2, m4
+ movh m1, [srcq + 1 + %1]
+ movh m5, [srcq + 2 + %1]
+ punpckldq m1, m5
+ movhlps m3, m1
+ punpcklbw m1, m4
+ punpcklbw m3, m4
+ pmaddwd m0, m6
+ pmaddwd m1, m6
+ pmaddwd m2, m6
+ pmaddwd m3, m6
+ packssdw m0, m1
+ packssdw m2, m3
+ pshuflw m1, m0, q2301
+ pshufhw m1, m1, q2301
+ pshuflw m3, m2, q2301
+ pshufhw m3, m3, q2301
+ paddw m0, m1
+ paddw m2, m3
+ psrld m0, 16
+ psrld m2, 16
+ packssdw m0, m2
+ paddw m0, m7
+ psraw m0, 6
+ packuswb m0, m0
+ movh [dstq + %1], m0
+%endmacro
+
+%macro FILTER_H4_w8_sse2 0
+ FILH4W8_sse2 0
+%endmacro
+
+%macro FILTER_H4_w12_sse2 0
+ FILH4W8_sse2 0
+ movd m1, [srcq - 1 + 8]
+ movd m3, [srcq + 8]
+ punpckldq m1, m3
+ punpcklbw m1, m4
+ movd m2, [srcq + 1 + 8]
+ movd m3, [srcq + 2 + 8]
+ punpckldq m2, m3
+ punpcklbw m2, m4
+ pmaddwd m1, m6
+ pmaddwd m2, m6
+ packssdw m1, m2
+ pshuflw m2, m1, q2301
+ pshufhw m2, m2, q2301
+ paddw m1, m2
+ psrld m1, 16
+ packssdw m1, m1
+ paddw m1, m7
+ psraw m1, 6
+ packuswb m1, m1
+ movd [dstq + 8], m1
+%endmacro
+
+%macro FILTER_H4_w16_sse2 0
+ FILH4W8_sse2 0
+ FILH4W8_sse2 8
+%endmacro
+
+%macro FILTER_H4_w24_sse2 0
+ FILH4W8_sse2 0
+ FILH4W8_sse2 8
+ FILH4W8_sse2 16
+%endmacro
+
+%macro FILTER_H4_w32_sse2 0
+ FILH4W8_sse2 0
+ FILH4W8_sse2 8
+ FILH4W8_sse2 16
+ FILH4W8_sse2 24
+%endmacro
+
+%macro FILTER_H4_w48_sse2 0
+ FILH4W8_sse2 0
+ FILH4W8_sse2 8
+ FILH4W8_sse2 16
+ FILH4W8_sse2 24
+ FILH4W8_sse2 32
+ FILH4W8_sse2 40
+%endmacro
+
+%macro FILTER_H4_w64_sse2 0
+ FILH4W8_sse2 0
+ FILH4W8_sse2 8
+ FILH4W8_sse2 16
+ FILH4W8_sse2 24
+ FILH4W8_sse2 32
+ FILH4W8_sse2 40
+ FILH4W8_sse2 48
+ FILH4W8_sse2 56
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void interp_4tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
+;-----------------------------------------------------------------------------
+%macro IPFILTER_CHROMA_sse3 2
+INIT_XMM sse3
+cglobal interp_4tap_horiz_pp_%1x%2, 4, 6, 8, src, srcstride, dst, dststride
+ mov r4d, r4m
+ mova m7, [pw_32]
+ pxor m4, m4
+
+%ifdef PIC
+ lea r5, [tabw_ChromaCoeff]
+ movddup m6, [r5 + r4 * 8]
+%else
+ movddup m6, [tabw_ChromaCoeff + r4 * 8]
+%endif
+
+%assign x 1
+%rep %2
+ FILTER_H4_w%1_sse2
+%if x < %2
+ add srcq, srcstrideq
+ add dstq, dststrideq
+%endif
+%assign x x+1
+%endrep
+
+ RET
+
+%endmacro
+
+ IPFILTER_CHROMA_sse3 6, 8
+ IPFILTER_CHROMA_sse3 8, 2
+ IPFILTER_CHROMA_sse3 8, 4
+ IPFILTER_CHROMA_sse3 8, 6
+ IPFILTER_CHROMA_sse3 8, 8
+ IPFILTER_CHROMA_sse3 8, 16
+ IPFILTER_CHROMA_sse3 8, 32
+ IPFILTER_CHROMA_sse3 12, 16
+
+ IPFILTER_CHROMA_sse3 6, 16
+ IPFILTER_CHROMA_sse3 8, 12
+ IPFILTER_CHROMA_sse3 8, 64
+ IPFILTER_CHROMA_sse3 12, 32
+
+;-----------------------------------------------------------------------------
+; void interp_4tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
+;-----------------------------------------------------------------------------
+%macro IPFILTER_CHROMA_W_sse3 2
+INIT_XMM sse3
+cglobal interp_4tap_horiz_pp_%1x%2, 4, 6, 8, src, srcstride, dst, dststride
+ mov r4d, r4m
+ mova m7, [pw_32]
+ pxor m4, m4
+%ifdef PIC
+ lea r5, [tabw_ChromaCoeff]
+ movddup m6, [r5 + r4 * 8]
+%else
+ movddup m6, [tabw_ChromaCoeff + r4 * 8]
+%endif
+
+%assign x 1
+%rep %2
+ FILTER_H4_w%1_sse2
+%if x < %2
+ add srcq, srcstrideq
+ add dstq, dststrideq
+%endif
+%assign x x+1
+%endrep
+
+ RET
+
+%endmacro
+
+ IPFILTER_CHROMA_W_sse3 16, 4
+ IPFILTER_CHROMA_W_sse3 16, 8
+ IPFILTER_CHROMA_W_sse3 16, 12
+ IPFILTER_CHROMA_W_sse3 16, 16
+ IPFILTER_CHROMA_W_sse3 16, 32
+ IPFILTER_CHROMA_W_sse3 32, 8
+ IPFILTER_CHROMA_W_sse3 32, 16
+ IPFILTER_CHROMA_W_sse3 32, 24
+ IPFILTER_CHROMA_W_sse3 24, 32
+ IPFILTER_CHROMA_W_sse3 32, 32
+
+ IPFILTER_CHROMA_W_sse3 16, 24
+ IPFILTER_CHROMA_W_sse3 16, 64
+ IPFILTER_CHROMA_W_sse3 32, 48
+ IPFILTER_CHROMA_W_sse3 24, 64
+ IPFILTER_CHROMA_W_sse3 32, 64
+
+ IPFILTER_CHROMA_W_sse3 64, 64
+ IPFILTER_CHROMA_W_sse3 64, 32
+ IPFILTER_CHROMA_W_sse3 64, 48
+ IPFILTER_CHROMA_W_sse3 48, 64
+ IPFILTER_CHROMA_W_sse3 64, 16
+
;-----------------------------------------------------------------------------
; void interp_4tap_horiz_pp_2x4(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
;-----------------------------------------------------------------------------
diff -r c135c117ffb0 -r 829814365241 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h Tue Apr 21 13:42:36 2015 -0500
+++ b/source/common/x86/ipfilter8.h Tue Apr 21 20:37:47 2015 -0700
@@ -814,6 +814,38 @@
void x265_interp_4tap_horiz_pp_4x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_4tap_horiz_pp_4x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_4tap_horiz_pp_4x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_6x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_6x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x2_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x4_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x6_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x12_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_12x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_12x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x4_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x12_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x24_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_24x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_24x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x24_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_48x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
#undef LUMA_FILTERS
#undef LUMA_SP_FILTERS
#undef LUMA_SS_FILTERS
More information about the x265-devel
mailing list