[x265] [PATCH] asm: interp_4tap_horiz_pp sse3
chen
chenm003 at 163.com
Wed Apr 22 07:14:43 CEST 2015
right now
At 2015-04-22 11:38:12,dtyx265 at gmail.com wrote:
># HG changeset patch
># User David T Yuen <dtyx265 at gmail.com>
># Date 1429673867 25200
># Node ID 829814365241f61737f3f39a400f55cc49702679
># Parent c135c117ffb083a00d4353279ea669e8f3f7a8ee
>asm: interp_4tap_horiz_pp sse3
>
>This replaces c code for 6x8, 6x16, 8x2, 8x4, 8x6, 8x8, 8x12, 8x16, 8x32, 8x64, 12x16, 12x32, 16x8, 16x12,
>16x16, 16x24, 16x32, 16x64, 24x32, 24x64, 32x8, 32x16, 32x24, 32x32, 32x48, 32x64, 48x64, 64x16, 64x32,
>64x48, 64x64
>
>Macros are used to add the primitives to asm-primitives.cpp
>
>64-bit
>
>./test/TestBench --testbench interp | grep hpp
>chroma_hpp[ 8x8] 3.02x 3087.49 9315.21
>chroma_hpp[16x16] 3.09x 11813.11 36504.26
>chroma_hpp[32x32] 3.45x 46862.27 161615.81
>chroma_hpp[ 8x4] 2.94x 1567.50 4614.73
>chroma_hpp[ 16x8] 3.10x 5930.00 18377.70
>chroma_hpp[ 8x16] 3.02x 6130.00 18520.00
>chroma_hpp[32x16] 3.46x 23330.07 80829.76
>chroma_hpp[16x32] 3.16x 23572.66 74452.23
>chroma_hpp[ 8x6] 2.93x 2339.99 6863.20
>chroma_hpp[ 6x8] 2.52x 2812.50 7075.69
>chroma_hpp[ 8x2] 2.25x 812.50 1830.00
>chroma_hpp[16x12] 3.10x 8875.07 27545.60
>chroma_hpp[12x16] 2.80x 9810.20 27476.36
>chroma_hpp[ 16x4] 3.05x 2995.20 9144.59
>chroma_hpp[32x24] 3.45x 34970.45 120594.13
>chroma_hpp[24x32] 3.49x 35116.79 122662.94
>chroma_hpp[ 32x8] 3.45x 11699.82 40402.34
>chroma_hpp[ 8x32] 3.00x 12210.00 36603.46
>chroma_hpp[ 8x16] 3.02x 6130.00 18520.00
>chroma_hpp[16x32] 3.09x 23573.10 72827.95
>chroma_hpp[32x64] 3.58x 93938.63 335978.50
>chroma_hpp[ 8x8] 3.02x 3087.49 9314.74
>chroma_hpp[16x16] 3.09x 11815.00 36545.97
>chroma_hpp[ 8x32] 3.02x 12212.27 36870.14
>chroma_hpp[32x32] 3.45x 46748.56 161259.67
>chroma_hpp[16x64] 3.18x 47185.50 150017.53
>chroma_hpp[ 8x12] 3.04x 4607.50 14000.63
>chroma_hpp[ 6x16] 2.49x 5570.10 13870.01
>chroma_hpp[ 8x4] 2.94x 1570.00 4613.64
>chroma_hpp[16x24] 3.08x 17690.69 54547.18
>chroma_hpp[12x32] 2.80x 19618.33 54833.57
>chroma_hpp[ 16x8] 3.10x 5932.57 18377.34
>chroma_hpp[32x48] 3.45x 70041.92 241370.78
>chroma_hpp[24x64] 3.53x 70596.84 249020.33
>chroma_hpp[32x16] 3.44x 23374.66 80340.53
>chroma_hpp[ 8x64] 3.00x 24422.17 73313.97
>chroma_hpp[ 8x8] 3.01x 3090.00 9314.26
>chroma_hpp[16x16] 3.11x 11810.00 36736.14
>chroma_hpp[32x32] 3.47x 46771.40 162154.16
>chroma_hpp[64x64] 3.25x 195843.97 636910.44
>chroma_hpp[ 8x4] 2.94x 1570.00 4613.35
>chroma_hpp[ 16x8] 3.10x 5933.42 18381.31
>chroma_hpp[ 8x16] 3.02x 6131.43 18520.17
>chroma_hpp[32x16] 3.42x 23450.76 80160.37
>chroma_hpp[16x32] 3.09x 23619.58 73027.41
>chroma_hpp[64x32] 3.42x 92894.85 318107.38
>chroma_hpp[32x64] 3.48x 93646.98 325950.78
>chroma_hpp[16x12] 3.10x 8874.99 27503.11
>chroma_hpp[12x16] 2.83x 9809.99 27769.48
>chroma_hpp[ 16x4] 3.05x 2994.99 9138.53
>chroma_hpp[32x24] 3.42x 35123.29 120115.27
>chroma_hpp[24x32] 3.53x 35143.41 124032.27
>chroma_hpp[ 32x8] 3.46x 11692.58 40400.25
>chroma_hpp[ 8x32] 3.02x 12212.50 36843.57
>chroma_hpp[64x48] 3.36x 140979.36 473912.28
>chroma_hpp[48x64] 3.43x 140712.88 482047.69
>chroma_hpp[64x16] 3.39x 46530.16 157859.31
>chroma_hpp[16x64] 3.08x 47197.85 145477.02
>
>32-bit
>
>./test/TestBench --testbench interp | grep hpp
>chroma_hpp[ 8x8] 2.96x 3164.98 9354.15
>chroma_hpp[16x16] 3.07x 11885.01 36438.13
>chroma_hpp[32x32] 3.48x 46818.91 162929.45
>chroma_hpp[ 8x4] 2.86x 1645.00 4703.57
>chroma_hpp[ 16x8] 3.06x 6005.10 18378.64
>chroma_hpp[ 8x16] 2.97x 6205.00 18429.90
>chroma_hpp[32x16] 3.46x 23463.52 81110.52
>chroma_hpp[16x32] 3.10x 23700.07 73429.12
>chroma_hpp[ 8x6] 2.89x 2404.99 6942.73
>chroma_hpp[ 6x8] 2.46x 2905.00 7155.45
>chroma_hpp[ 8x2] 2.69x 885.00 2379.96
>chroma_hpp[16x12] 3.07x 8945.04 27458.99
>chroma_hpp[12x16] 2.81x 9862.55 27753.80
>chroma_hpp[ 16x4] 3.01x 3065.00 9231.22
>chroma_hpp[32x24] 3.45x 35140.03 121204.09
>chroma_hpp[24x32] 3.51x 35262.80 123779.88
>chroma_hpp[ 32x8] 3.47x 11765.00 40847.72
>chroma_hpp[ 8x32] 2.98x 12285.00 36623.77
>chroma_hpp[ 8x16] 2.97x 6205.00 18429.95
>chroma_hpp[16x32] 3.08x 23691.43 72971.20
>chroma_hpp[32x64] 3.47x 93595.39 324758.03
>chroma_hpp[ 8x8] 2.95x 3165.39 9353.01
>chroma_hpp[16x16] 3.07x 11885.00 36438.18
>chroma_hpp[ 8x32] 2.98x 12285.21 36614.84
>chroma_hpp[32x32] 3.48x 46794.59 162647.84
>chroma_hpp[16x64] 3.08x 47299.79 145605.62
>chroma_hpp[ 8x12] 2.98x 4685.06 13949.95
>chroma_hpp[ 6x16] 2.46x 5672.50 13972.76
>chroma_hpp[ 8x4] 2.86x 1645.00 4702.53
>chroma_hpp[16x24] 3.06x 17765.06 54398.70
>chroma_hpp[12x32] 2.79x 19676.93 54843.11
>chroma_hpp[ 16x8] 3.06x 6005.12 18377.65
>chroma_hpp[32x48] 3.46x 70176.74 243033.73
>chroma_hpp[24x64] 3.51x 70367.40 246988.72
>chroma_hpp[32x16] 3.47x 23405.43 81235.64
>chroma_hpp[ 8x64] 2.97x 24490.71 72757.92
>chroma_hpp[ 8x8] 2.95x 3165.00 9352.45
>chroma_hpp[16x16] 3.07x 11885.00 36437.35
>chroma_hpp[32x32] 3.48x 46781.39 162731.84
>chroma_hpp[64x64] 3.28x 193972.66 635870.62
>chroma_hpp[ 8x4] 2.86x 1645.00 4702.79
>chroma_hpp[ 16x8] 3.06x 6005.00 18377.74
>chroma_hpp[ 8x16] 2.97x 6205.04 18430.28
>chroma_hpp[32x16] 3.46x 23452.05 81121.86
>chroma_hpp[16x32] 3.07x 23695.18 72740.23
>chroma_hpp[64x32] 3.42x 92974.16 317723.12
>chroma_hpp[32x64] 3.47x 93467.95 324431.16
>chroma_hpp[16x12] 3.07x 8945.09 27457.70
>chroma_hpp[12x16] 2.79x 9862.54 27477.89
>chroma_hpp[ 16x4] 3.01x 3065.02 9231.55
>chroma_hpp[32x24] 3.45x 35161.96 121188.20
>chroma_hpp[24x32] 3.51x 35275.57 123776.31
>chroma_hpp[ 32x8] 3.47x 11765.00 40847.59
>chroma_hpp[ 8x32] 2.98x 12285.06 36637.80
>chroma_hpp[64x48] 3.41x 139693.42 476274.88
>chroma_hpp[48x64] 3.44x 139707.61 480515.22
>chroma_hpp[64x16] 3.41x 46575.90 158769.59
>chroma_hpp[16x64] 3.08x 47262.82 145408.81
>
>diff -r c135c117ffb0 -r 829814365241 source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp Tue Apr 21 13:42:36 2015 -0500
>+++ b/source/common/x86/asm-primitives.cpp Tue Apr 21 20:37:47 2015 -0700
>@@ -1407,18 +1407,9 @@
> }
> if (cpuMask & X265_CPU_SSE3)
> {
>- p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].filter_hpp = x265_interp_4tap_horiz_pp_2x4_sse3;
>- p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].filter_hpp = x265_interp_4tap_horiz_pp_2x8_sse3;
>- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_hpp = x265_interp_4tap_horiz_pp_4x2_sse3;
>- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_sse3;
>- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].filter_hpp = x265_interp_4tap_horiz_pp_4x8_sse3;
>- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].filter_hpp = x265_interp_4tap_horiz_pp_4x16_sse3;
>- p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_hpp = x265_interp_4tap_horiz_pp_2x8_sse3;
>- p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_hpp = x265_interp_4tap_horiz_pp_2x16_sse3;
>- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_sse3;
>- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_hpp = x265_interp_4tap_horiz_pp_4x8_sse3;
>- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_hpp = x265_interp_4tap_horiz_pp_4x16_sse3;
>- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_hpp = x265_interp_4tap_horiz_pp_4x32_sse3;
>+ ALL_CHROMA_420_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
>+ ALL_CHROMA_422_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
>+ ALL_CHROMA_444_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
> }
> if (cpuMask & X265_CPU_SSSE3)
> {
>diff -r c135c117ffb0 -r 829814365241 source/common/x86/ipfilter8.asm
>--- a/source/common/x86/ipfilter8.asm Tue Apr 21 13:42:36 2015 -0500
>+++ b/source/common/x86/ipfilter8.asm Tue Apr 21 20:37:47 2015 -0700
>@@ -594,6 +594,237 @@
> mov [dstq + dststrideq], r4w
> %endmacro
>
>+%macro FILTER_H4_w6_sse2 0
>+ pxor m4, m4
>+ movh m0, [srcq - 1]
>+ movh m5, [srcq]
>+ punpckldq m0, m5
>+ movhlps m2, m0
>+ punpcklbw m0, m4
>+ punpcklbw m2, m4
>+ movd m1, [srcq + 1]
>+ movd m5, [srcq + 2]
>+ punpckldq m1, m5
>+ punpcklbw m1, m4
>+ pmaddwd m0, m6
>+ pmaddwd m1, m6
>+ pmaddwd m2, m6
>+ packssdw m0, m1
>+ packssdw m2, m2
>+ pshuflw m1, m0, q2301
>+ pshufhw m1, m1, q2301
>+ pshuflw m3, m2, q2301
>+ paddw m0, m1
>+ paddw m2, m3
>+ psrld m0, 16
>+ psrld m2, 16
>+ packssdw m0, m2
>+ paddw m0, m7
>+ psraw m0, 6
>+ packuswb m0, m0
>+ movd [dstq], m0
>+ pextrw r4d, m0, 2
>+ mov [dstq + 4], r4w
>+%endmacro
>+
>+%macro FILH4W8_sse2 1
>+ movh m0, [srcq - 1 + %1]
>+ movh m5, [srcq + %1]
>+ punpckldq m0, m5
>+ movhlps m2, m0
>+ punpcklbw m0, m4
>+ punpcklbw m2, m4
>+ movh m1, [srcq + 1 + %1]
>+ movh m5, [srcq + 2 + %1]
>+ punpckldq m1, m5
>+ movhlps m3, m1
>+ punpcklbw m1, m4
>+ punpcklbw m3, m4
>+ pmaddwd m0, m6
>+ pmaddwd m1, m6
>+ pmaddwd m2, m6
>+ pmaddwd m3, m6
>+ packssdw m0, m1
>+ packssdw m2, m3
>+ pshuflw m1, m0, q2301
>+ pshufhw m1, m1, q2301
>+ pshuflw m3, m2, q2301
>+ pshufhw m3, m3, q2301
>+ paddw m0, m1
>+ paddw m2, m3
>+ psrld m0, 16
>+ psrld m2, 16
>+ packssdw m0, m2
>+ paddw m0, m7
>+ psraw m0, 6
>+ packuswb m0, m0
>+ movh [dstq + %1], m0
>+%endmacro
>+
>+%macro FILTER_H4_w8_sse2 0
>+ FILH4W8_sse2 0
>+%endmacro
>+
>+%macro FILTER_H4_w12_sse2 0
>+ FILH4W8_sse2 0
>+ movd m1, [srcq - 1 + 8]
>+ movd m3, [srcq + 8]
>+ punpckldq m1, m3
>+ punpcklbw m1, m4
>+ movd m2, [srcq + 1 + 8]
>+ movd m3, [srcq + 2 + 8]
>+ punpckldq m2, m3
>+ punpcklbw m2, m4
>+ pmaddwd m1, m6
>+ pmaddwd m2, m6
>+ packssdw m1, m2
>+ pshuflw m2, m1, q2301
>+ pshufhw m2, m2, q2301
>+ paddw m1, m2
>+ psrld m1, 16
>+ packssdw m1, m1
>+ paddw m1, m7
>+ psraw m1, 6
>+ packuswb m1, m1
>+ movd [dstq + 8], m1
>+%endmacro
>+
>+%macro FILTER_H4_w16_sse2 0
>+ FILH4W8_sse2 0
>+ FILH4W8_sse2 8
>+%endmacro
>+
>+%macro FILTER_H4_w24_sse2 0
>+ FILH4W8_sse2 0
>+ FILH4W8_sse2 8
>+ FILH4W8_sse2 16
>+%endmacro
>+
>+%macro FILTER_H4_w32_sse2 0
>+ FILH4W8_sse2 0
>+ FILH4W8_sse2 8
>+ FILH4W8_sse2 16
>+ FILH4W8_sse2 24
>+%endmacro
>+
>+%macro FILTER_H4_w48_sse2 0
>+ FILH4W8_sse2 0
>+ FILH4W8_sse2 8
>+ FILH4W8_sse2 16
>+ FILH4W8_sse2 24
>+ FILH4W8_sse2 32
>+ FILH4W8_sse2 40
>+%endmacro
>+
>+%macro FILTER_H4_w64_sse2 0
>+ FILH4W8_sse2 0
>+ FILH4W8_sse2 8
>+ FILH4W8_sse2 16
>+ FILH4W8_sse2 24
>+ FILH4W8_sse2 32
>+ FILH4W8_sse2 40
>+ FILH4W8_sse2 48
>+ FILH4W8_sse2 56
>+%endmacro
>+
>+;-----------------------------------------------------------------------------
>+; void interp_4tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
>+;-----------------------------------------------------------------------------
>+%macro IPFILTER_CHROMA_sse3 2
>+INIT_XMM sse3
>+cglobal interp_4tap_horiz_pp_%1x%2, 4, 6, 8, src, srcstride, dst, dststride
>+ mov r4d, r4m
>+ mova m7, [pw_32]
>+ pxor m4, m4
>+
>+%ifdef PIC
>+ lea r5, [tabw_ChromaCoeff]
>+ movddup m6, [r5 + r4 * 8]
>+%else
>+ movddup m6, [tabw_ChromaCoeff + r4 * 8]
>+%endif
>+
>+%assign x 1
>+%rep %2
>+ FILTER_H4_w%1_sse2
>+%if x < %2
>+ add srcq, srcstrideq
>+ add dstq, dststrideq
>+%endif
>+%assign x x+1
>+%endrep
>+
>+ RET
>+
>+%endmacro
>+
>+ IPFILTER_CHROMA_sse3 6, 8
>+ IPFILTER_CHROMA_sse3 8, 2
>+ IPFILTER_CHROMA_sse3 8, 4
>+ IPFILTER_CHROMA_sse3 8, 6
>+ IPFILTER_CHROMA_sse3 8, 8
>+ IPFILTER_CHROMA_sse3 8, 16
>+ IPFILTER_CHROMA_sse3 8, 32
>+ IPFILTER_CHROMA_sse3 12, 16
>+
>+ IPFILTER_CHROMA_sse3 6, 16
>+ IPFILTER_CHROMA_sse3 8, 12
>+ IPFILTER_CHROMA_sse3 8, 64
>+ IPFILTER_CHROMA_sse3 12, 32
>+
>+;-----------------------------------------------------------------------------
>+; void interp_4tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
>+;-----------------------------------------------------------------------------
>+%macro IPFILTER_CHROMA_W_sse3 2
>+INIT_XMM sse3
>+cglobal interp_4tap_horiz_pp_%1x%2, 4, 6, 8, src, srcstride, dst, dststride
>+ mov r4d, r4m
>+ mova m7, [pw_32]
>+ pxor m4, m4
>+%ifdef PIC
>+ lea r5, [tabw_ChromaCoeff]
>+ movddup m6, [r5 + r4 * 8]
>+%else
>+ movddup m6, [tabw_ChromaCoeff + r4 * 8]
>+%endif
>+
>+%assign x 1
>+%rep %2
>+ FILTER_H4_w%1_sse2
>+%if x < %2
>+ add srcq, srcstrideq
>+ add dstq, dststrideq
>+%endif
>+%assign x x+1
>+%endrep
>+
>+ RET
>+
>+%endmacro
>+
>+ IPFILTER_CHROMA_W_sse3 16, 4
>+ IPFILTER_CHROMA_W_sse3 16, 8
>+ IPFILTER_CHROMA_W_sse3 16, 12
>+ IPFILTER_CHROMA_W_sse3 16, 16
>+ IPFILTER_CHROMA_W_sse3 16, 32
>+ IPFILTER_CHROMA_W_sse3 32, 8
>+ IPFILTER_CHROMA_W_sse3 32, 16
>+ IPFILTER_CHROMA_W_sse3 32, 24
>+ IPFILTER_CHROMA_W_sse3 24, 32
>+ IPFILTER_CHROMA_W_sse3 32, 32
>+
>+ IPFILTER_CHROMA_W_sse3 16, 24
>+ IPFILTER_CHROMA_W_sse3 16, 64
>+ IPFILTER_CHROMA_W_sse3 32, 48
>+ IPFILTER_CHROMA_W_sse3 24, 64
>+ IPFILTER_CHROMA_W_sse3 32, 64
>+
>+ IPFILTER_CHROMA_W_sse3 64, 64
>+ IPFILTER_CHROMA_W_sse3 64, 32
>+ IPFILTER_CHROMA_W_sse3 64, 48
>+ IPFILTER_CHROMA_W_sse3 48, 64
>+ IPFILTER_CHROMA_W_sse3 64, 16
>+
> ;-----------------------------------------------------------------------------
> ; void interp_4tap_horiz_pp_2x4(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
> ;-----------------------------------------------------------------------------
>diff -r c135c117ffb0 -r 829814365241 source/common/x86/ipfilter8.h
>--- a/source/common/x86/ipfilter8.h Tue Apr 21 13:42:36 2015 -0500
>+++ b/source/common/x86/ipfilter8.h Tue Apr 21 20:37:47 2015 -0700
>@@ -814,6 +814,38 @@
> void x265_interp_4tap_horiz_pp_4x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_4tap_horiz_pp_4x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_4tap_horiz_pp_4x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_6x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_6x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x2_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x4_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x6_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x12_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_12x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_12x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x4_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x12_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x24_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_24x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_24x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x24_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_48x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_64x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_64x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_64x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_64x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> #undef LUMA_FILTERS
> #undef LUMA_SP_FILTERS
> #undef LUMA_SS_FILTERS
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150422/a56ca783/attachment-0001.html>
More information about the x265-devel
mailing list