[x265] [PATCH] asm: interp_4tap_horiz_pp sse3

chen chenm003 at 163.com
Wed Apr 22 07:14:43 CEST 2015


right now


At 2015-04-22 11:38:12,dtyx265 at gmail.com wrote:
># HG changeset patch
># User David T Yuen <dtyx265 at gmail.com>
># Date 1429673867 25200
># Node ID 829814365241f61737f3f39a400f55cc49702679
># Parent  c135c117ffb083a00d4353279ea669e8f3f7a8ee
>asm: interp_4tap_horiz_pp sse3
>
>This replaces c code for 6x8, 6x16, 8x2, 8x4, 8x6, 8x8, 8x12, 8x16, 8x32, 8x64, 12x16, 12x32, 16x8, 16x12,
>16x16, 16x24, 16x32, 16x64, 24x32, 24x64, 32x8, 32x16, 32x24, 32x32, 32x48, 32x64, 48x64, 64x16, 64x32,
>64x48, 64x64
>
>Macros are used to add the primitives to asm-primitives.cpp
>
>64-bit
>
>./test/TestBench --testbench interp | grep hpp
>chroma_hpp[  8x8]	3.02x 	 3087.49  	 9315.21
>chroma_hpp[16x16]	3.09x 	 11813.11 	 36504.26
>chroma_hpp[32x32]	3.45x 	 46862.27 	 161615.81
>chroma_hpp[  8x4]	2.94x 	 1567.50  	 4614.73
>chroma_hpp[ 16x8]	3.10x 	 5930.00  	 18377.70
>chroma_hpp[ 8x16]	3.02x 	 6130.00  	 18520.00
>chroma_hpp[32x16]	3.46x 	 23330.07 	 80829.76
>chroma_hpp[16x32]	3.16x 	 23572.66 	 74452.23
>chroma_hpp[  8x6]	2.93x 	 2339.99  	 6863.20
>chroma_hpp[  6x8]	2.52x 	 2812.50  	 7075.69
>chroma_hpp[  8x2]	2.25x 	 812.50   	 1830.00
>chroma_hpp[16x12]	3.10x 	 8875.07  	 27545.60
>chroma_hpp[12x16]	2.80x 	 9810.20  	 27476.36
>chroma_hpp[ 16x4]	3.05x 	 2995.20  	 9144.59
>chroma_hpp[32x24]	3.45x 	 34970.45 	 120594.13
>chroma_hpp[24x32]	3.49x 	 35116.79 	 122662.94
>chroma_hpp[ 32x8]	3.45x 	 11699.82 	 40402.34
>chroma_hpp[ 8x32]	3.00x 	 12210.00 	 36603.46
>chroma_hpp[ 8x16]	3.02x 	 6130.00  	 18520.00
>chroma_hpp[16x32]	3.09x 	 23573.10 	 72827.95
>chroma_hpp[32x64]	3.58x 	 93938.63 	 335978.50
>chroma_hpp[  8x8]	3.02x 	 3087.49  	 9314.74
>chroma_hpp[16x16]	3.09x 	 11815.00 	 36545.97
>chroma_hpp[ 8x32]	3.02x 	 12212.27 	 36870.14
>chroma_hpp[32x32]	3.45x 	 46748.56 	 161259.67
>chroma_hpp[16x64]	3.18x 	 47185.50 	 150017.53
>chroma_hpp[ 8x12]	3.04x 	 4607.50  	 14000.63
>chroma_hpp[ 6x16]	2.49x 	 5570.10  	 13870.01
>chroma_hpp[  8x4]	2.94x 	 1570.00  	 4613.64
>chroma_hpp[16x24]	3.08x 	 17690.69 	 54547.18
>chroma_hpp[12x32]	2.80x 	 19618.33 	 54833.57
>chroma_hpp[ 16x8]	3.10x 	 5932.57  	 18377.34
>chroma_hpp[32x48]	3.45x 	 70041.92 	 241370.78
>chroma_hpp[24x64]	3.53x 	 70596.84 	 249020.33
>chroma_hpp[32x16]	3.44x 	 23374.66 	 80340.53
>chroma_hpp[ 8x64]	3.00x 	 24422.17 	 73313.97
>chroma_hpp[  8x8]	3.01x 	 3090.00  	 9314.26
>chroma_hpp[16x16]	3.11x 	 11810.00 	 36736.14
>chroma_hpp[32x32]	3.47x 	 46771.40 	 162154.16
>chroma_hpp[64x64]	3.25x 	 195843.97 	 636910.44
>chroma_hpp[  8x4]	2.94x 	 1570.00  	 4613.35
>chroma_hpp[ 16x8]	3.10x 	 5933.42  	 18381.31
>chroma_hpp[ 8x16]	3.02x 	 6131.43  	 18520.17
>chroma_hpp[32x16]	3.42x 	 23450.76 	 80160.37
>chroma_hpp[16x32]	3.09x 	 23619.58 	 73027.41
>chroma_hpp[64x32]	3.42x 	 92894.85 	 318107.38
>chroma_hpp[32x64]	3.48x 	 93646.98 	 325950.78
>chroma_hpp[16x12]	3.10x 	 8874.99  	 27503.11
>chroma_hpp[12x16]	2.83x 	 9809.99  	 27769.48
>chroma_hpp[ 16x4]	3.05x 	 2994.99  	 9138.53
>chroma_hpp[32x24]	3.42x 	 35123.29 	 120115.27
>chroma_hpp[24x32]	3.53x 	 35143.41 	 124032.27
>chroma_hpp[ 32x8]	3.46x 	 11692.58 	 40400.25
>chroma_hpp[ 8x32]	3.02x 	 12212.50 	 36843.57
>chroma_hpp[64x48]	3.36x 	 140979.36 	 473912.28
>chroma_hpp[48x64]	3.43x 	 140712.88 	 482047.69
>chroma_hpp[64x16]	3.39x 	 46530.16 	 157859.31
>chroma_hpp[16x64]	3.08x 	 47197.85 	 145477.02
>
>32-bit
>
>./test/TestBench --testbench interp | grep hpp
>chroma_hpp[  8x8]	2.96x 	 3164.98  	 9354.15
>chroma_hpp[16x16]	3.07x 	 11885.01 	 36438.13
>chroma_hpp[32x32]	3.48x 	 46818.91 	 162929.45
>chroma_hpp[  8x4]	2.86x 	 1645.00  	 4703.57
>chroma_hpp[ 16x8]	3.06x 	 6005.10  	 18378.64
>chroma_hpp[ 8x16]	2.97x 	 6205.00  	 18429.90
>chroma_hpp[32x16]	3.46x 	 23463.52 	 81110.52
>chroma_hpp[16x32]	3.10x 	 23700.07 	 73429.12
>chroma_hpp[  8x6]	2.89x 	 2404.99  	 6942.73
>chroma_hpp[  6x8]	2.46x 	 2905.00  	 7155.45
>chroma_hpp[  8x2]	2.69x 	 885.00   	 2379.96
>chroma_hpp[16x12]	3.07x 	 8945.04  	 27458.99
>chroma_hpp[12x16]	2.81x 	 9862.55  	 27753.80
>chroma_hpp[ 16x4]	3.01x 	 3065.00  	 9231.22
>chroma_hpp[32x24]	3.45x 	 35140.03 	 121204.09
>chroma_hpp[24x32]	3.51x 	 35262.80 	 123779.88
>chroma_hpp[ 32x8]	3.47x 	 11765.00 	 40847.72
>chroma_hpp[ 8x32]	2.98x 	 12285.00 	 36623.77
>chroma_hpp[ 8x16]	2.97x 	 6205.00  	 18429.95
>chroma_hpp[16x32]	3.08x 	 23691.43 	 72971.20
>chroma_hpp[32x64]	3.47x 	 93595.39 	 324758.03
>chroma_hpp[  8x8]	2.95x 	 3165.39  	 9353.01
>chroma_hpp[16x16]	3.07x 	 11885.00 	 36438.18
>chroma_hpp[ 8x32]	2.98x 	 12285.21 	 36614.84
>chroma_hpp[32x32]	3.48x 	 46794.59 	 162647.84
>chroma_hpp[16x64]	3.08x 	 47299.79 	 145605.62
>chroma_hpp[ 8x12]	2.98x 	 4685.06  	 13949.95
>chroma_hpp[ 6x16]	2.46x 	 5672.50  	 13972.76
>chroma_hpp[  8x4]	2.86x 	 1645.00  	 4702.53
>chroma_hpp[16x24]	3.06x 	 17765.06 	 54398.70
>chroma_hpp[12x32]	2.79x 	 19676.93 	 54843.11
>chroma_hpp[ 16x8]	3.06x 	 6005.12  	 18377.65
>chroma_hpp[32x48]	3.46x 	 70176.74 	 243033.73
>chroma_hpp[24x64]	3.51x 	 70367.40 	 246988.72
>chroma_hpp[32x16]	3.47x 	 23405.43 	 81235.64
>chroma_hpp[ 8x64]	2.97x 	 24490.71 	 72757.92
>chroma_hpp[  8x8]	2.95x 	 3165.00  	 9352.45
>chroma_hpp[16x16]	3.07x 	 11885.00 	 36437.35
>chroma_hpp[32x32]	3.48x 	 46781.39 	 162731.84
>chroma_hpp[64x64]	3.28x 	 193972.66 	 635870.62
>chroma_hpp[  8x4]	2.86x 	 1645.00  	 4702.79
>chroma_hpp[ 16x8]	3.06x 	 6005.00  	 18377.74
>chroma_hpp[ 8x16]	2.97x 	 6205.04  	 18430.28
>chroma_hpp[32x16]	3.46x 	 23452.05 	 81121.86
>chroma_hpp[16x32]	3.07x 	 23695.18 	 72740.23
>chroma_hpp[64x32]	3.42x 	 92974.16 	 317723.12
>chroma_hpp[32x64]	3.47x 	 93467.95 	 324431.16
>chroma_hpp[16x12]	3.07x 	 8945.09  	 27457.70
>chroma_hpp[12x16]	2.79x 	 9862.54  	 27477.89
>chroma_hpp[ 16x4]	3.01x 	 3065.02  	 9231.55
>chroma_hpp[32x24]	3.45x 	 35161.96 	 121188.20
>chroma_hpp[24x32]	3.51x 	 35275.57 	 123776.31
>chroma_hpp[ 32x8]	3.47x 	 11765.00 	 40847.59
>chroma_hpp[ 8x32]	2.98x 	 12285.06 	 36637.80
>chroma_hpp[64x48]	3.41x 	 139693.42 	 476274.88
>chroma_hpp[48x64]	3.44x 	 139707.61 	 480515.22
>chroma_hpp[64x16]	3.41x 	 46575.90 	 158769.59
>chroma_hpp[16x64]	3.08x 	 47262.82 	 145408.81
>
>diff -r c135c117ffb0 -r 829814365241 source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp	Tue Apr 21 13:42:36 2015 -0500
>+++ b/source/common/x86/asm-primitives.cpp	Tue Apr 21 20:37:47 2015 -0700
>@@ -1407,18 +1407,9 @@
>     }
>     if (cpuMask & X265_CPU_SSE3)
>     {
>-        p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].filter_hpp = x265_interp_4tap_horiz_pp_2x4_sse3;
>-        p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].filter_hpp = x265_interp_4tap_horiz_pp_2x8_sse3;
>-        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_hpp = x265_interp_4tap_horiz_pp_4x2_sse3;
>-        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_sse3;
>-        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].filter_hpp = x265_interp_4tap_horiz_pp_4x8_sse3;
>-        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].filter_hpp = x265_interp_4tap_horiz_pp_4x16_sse3;
>-        p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_hpp = x265_interp_4tap_horiz_pp_2x8_sse3;
>-        p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_hpp = x265_interp_4tap_horiz_pp_2x16_sse3;
>-        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_sse3;
>-        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_hpp = x265_interp_4tap_horiz_pp_4x8_sse3;
>-        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_hpp = x265_interp_4tap_horiz_pp_4x16_sse3;
>-        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_hpp = x265_interp_4tap_horiz_pp_4x32_sse3;
>+        ALL_CHROMA_420_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
>+        ALL_CHROMA_422_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
>+        ALL_CHROMA_444_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
>     }
>     if (cpuMask & X265_CPU_SSSE3)
>     {
>diff -r c135c117ffb0 -r 829814365241 source/common/x86/ipfilter8.asm
>--- a/source/common/x86/ipfilter8.asm	Tue Apr 21 13:42:36 2015 -0500
>+++ b/source/common/x86/ipfilter8.asm	Tue Apr 21 20:37:47 2015 -0700
>@@ -594,6 +594,237 @@
>     mov         [dstq + dststrideq], r4w
> %endmacro
> 
>+%macro FILTER_H4_w6_sse2 0
>+    pxor        m4, m4
>+    movh        m0, [srcq - 1]
>+    movh        m5, [srcq]
>+    punpckldq   m0, m5
>+    movhlps     m2, m0
>+    punpcklbw   m0, m4
>+    punpcklbw   m2, m4
>+    movd        m1, [srcq + 1]
>+    movd        m5, [srcq + 2]
>+    punpckldq   m1, m5
>+    punpcklbw   m1, m4
>+    pmaddwd     m0, m6
>+    pmaddwd     m1, m6
>+    pmaddwd     m2, m6
>+    packssdw    m0, m1
>+    packssdw    m2, m2
>+    pshuflw     m1, m0, q2301
>+    pshufhw     m1, m1, q2301
>+    pshuflw     m3, m2, q2301
>+    paddw       m0, m1
>+    paddw       m2, m3
>+    psrld       m0, 16
>+    psrld       m2, 16
>+    packssdw    m0, m2
>+    paddw       m0, m7
>+    psraw       m0, 6
>+    packuswb    m0, m0
>+    movd        [dstq], m0
>+    pextrw      r4d, m0, 2
>+    mov         [dstq + 4], r4w
>+%endmacro
>+
>+%macro FILH4W8_sse2 1
>+    movh        m0, [srcq - 1 + %1]
>+    movh        m5, [srcq + %1]
>+    punpckldq   m0, m5
>+    movhlps     m2, m0
>+    punpcklbw   m0, m4
>+    punpcklbw   m2, m4
>+    movh        m1, [srcq + 1 + %1]
>+    movh        m5, [srcq + 2 + %1]
>+    punpckldq   m1, m5
>+    movhlps     m3, m1
>+    punpcklbw   m1, m4
>+    punpcklbw   m3, m4
>+    pmaddwd     m0, m6
>+    pmaddwd     m1, m6
>+    pmaddwd     m2, m6
>+    pmaddwd     m3, m6
>+    packssdw    m0, m1
>+    packssdw    m2, m3
>+    pshuflw     m1, m0, q2301
>+    pshufhw     m1, m1, q2301
>+    pshuflw     m3, m2, q2301
>+    pshufhw     m3, m3, q2301
>+    paddw       m0, m1
>+    paddw       m2, m3
>+    psrld       m0, 16
>+    psrld       m2, 16
>+    packssdw    m0, m2
>+    paddw       m0, m7
>+    psraw       m0, 6
>+    packuswb    m0, m0
>+    movh        [dstq + %1], m0
>+%endmacro
>+
>+%macro FILTER_H4_w8_sse2 0
>+    FILH4W8_sse2 0
>+%endmacro
>+
>+%macro FILTER_H4_w12_sse2 0
>+    FILH4W8_sse2 0
>+    movd        m1, [srcq - 1 + 8]
>+    movd        m3, [srcq + 8]
>+    punpckldq   m1, m3
>+    punpcklbw   m1, m4
>+    movd        m2, [srcq + 1 + 8]
>+    movd        m3, [srcq + 2 + 8]
>+    punpckldq   m2, m3
>+    punpcklbw   m2, m4
>+    pmaddwd     m1, m6
>+    pmaddwd     m2, m6
>+    packssdw    m1, m2
>+    pshuflw     m2, m1, q2301
>+    pshufhw     m2, m2, q2301
>+    paddw       m1, m2
>+    psrld       m1, 16
>+    packssdw    m1, m1
>+    paddw       m1, m7
>+    psraw       m1, 6
>+    packuswb    m1, m1
>+    movd        [dstq + 8], m1
>+%endmacro
>+
>+%macro FILTER_H4_w16_sse2 0
>+    FILH4W8_sse2 0
>+    FILH4W8_sse2 8
>+%endmacro
>+
>+%macro FILTER_H4_w24_sse2 0
>+    FILH4W8_sse2 0
>+    FILH4W8_sse2 8
>+    FILH4W8_sse2 16
>+%endmacro
>+
>+%macro FILTER_H4_w32_sse2 0
>+    FILH4W8_sse2 0
>+    FILH4W8_sse2 8
>+    FILH4W8_sse2 16
>+    FILH4W8_sse2 24
>+%endmacro
>+
>+%macro FILTER_H4_w48_sse2 0
>+    FILH4W8_sse2 0
>+    FILH4W8_sse2 8
>+    FILH4W8_sse2 16
>+    FILH4W8_sse2 24
>+    FILH4W8_sse2 32
>+    FILH4W8_sse2 40
>+%endmacro
>+
>+%macro FILTER_H4_w64_sse2 0
>+    FILH4W8_sse2 0
>+    FILH4W8_sse2 8
>+    FILH4W8_sse2 16
>+    FILH4W8_sse2 24
>+    FILH4W8_sse2 32
>+    FILH4W8_sse2 40
>+    FILH4W8_sse2 48
>+    FILH4W8_sse2 56
>+%endmacro
>+
>+;-----------------------------------------------------------------------------
>+; void interp_4tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
>+;-----------------------------------------------------------------------------
>+%macro IPFILTER_CHROMA_sse3 2
>+INIT_XMM sse3
>+cglobal interp_4tap_horiz_pp_%1x%2, 4, 6, 8, src, srcstride, dst, dststride
>+    mov         r4d,        r4m
>+    mova        m7,         [pw_32]
>+    pxor        m4,         m4
>+
>+%ifdef PIC
>+    lea         r5,          [tabw_ChromaCoeff]
>+    movddup     m6,       [r5 + r4 * 8]
>+%else
>+    movddup     m6,       [tabw_ChromaCoeff + r4 * 8]
>+%endif
>+
>+%assign x 1
>+%rep %2
>+    FILTER_H4_w%1_sse2
>+%if x < %2
>+    add         srcq,        srcstrideq
>+    add         dstq,        dststrideq
>+%endif
>+%assign x x+1
>+%endrep
>+
>+    RET
>+
>+%endmacro
>+
>+    IPFILTER_CHROMA_sse3 6,   8
>+    IPFILTER_CHROMA_sse3 8,   2
>+    IPFILTER_CHROMA_sse3 8,   4
>+    IPFILTER_CHROMA_sse3 8,   6
>+    IPFILTER_CHROMA_sse3 8,   8
>+    IPFILTER_CHROMA_sse3 8,  16
>+    IPFILTER_CHROMA_sse3 8,  32
>+    IPFILTER_CHROMA_sse3 12, 16
>+
>+    IPFILTER_CHROMA_sse3 6,  16
>+    IPFILTER_CHROMA_sse3 8,  12
>+    IPFILTER_CHROMA_sse3 8,  64
>+    IPFILTER_CHROMA_sse3 12, 32
>+
>+;-----------------------------------------------------------------------------
>+; void interp_4tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
>+;-----------------------------------------------------------------------------
>+%macro IPFILTER_CHROMA_W_sse3 2
>+INIT_XMM sse3
>+cglobal interp_4tap_horiz_pp_%1x%2, 4, 6, 8, src, srcstride, dst, dststride
>+    mov         r4d,         r4m
>+    mova        m7,         [pw_32]
>+    pxor        m4,         m4
>+%ifdef PIC
>+    lea         r5,          [tabw_ChromaCoeff]
>+    movddup     m6,       [r5 + r4 * 8]
>+%else
>+    movddup     m6,       [tabw_ChromaCoeff + r4 * 8]
>+%endif
>+
>+%assign x 1
>+%rep %2
>+    FILTER_H4_w%1_sse2
>+%if x < %2
>+    add         srcq,        srcstrideq
>+    add         dstq,        dststrideq
>+%endif
>+%assign x x+1
>+%endrep
>+
>+    RET
>+
>+%endmacro
>+
>+    IPFILTER_CHROMA_W_sse3 16,  4
>+    IPFILTER_CHROMA_W_sse3 16,  8
>+    IPFILTER_CHROMA_W_sse3 16, 12
>+    IPFILTER_CHROMA_W_sse3 16, 16
>+    IPFILTER_CHROMA_W_sse3 16, 32
>+    IPFILTER_CHROMA_W_sse3 32,  8
>+    IPFILTER_CHROMA_W_sse3 32, 16
>+    IPFILTER_CHROMA_W_sse3 32, 24
>+    IPFILTER_CHROMA_W_sse3 24, 32
>+    IPFILTER_CHROMA_W_sse3 32, 32
>+
>+    IPFILTER_CHROMA_W_sse3 16, 24
>+    IPFILTER_CHROMA_W_sse3 16, 64
>+    IPFILTER_CHROMA_W_sse3 32, 48
>+    IPFILTER_CHROMA_W_sse3 24, 64
>+    IPFILTER_CHROMA_W_sse3 32, 64
>+
>+    IPFILTER_CHROMA_W_sse3 64, 64
>+    IPFILTER_CHROMA_W_sse3 64, 32
>+    IPFILTER_CHROMA_W_sse3 64, 48
>+    IPFILTER_CHROMA_W_sse3 48, 64
>+    IPFILTER_CHROMA_W_sse3 64, 16
>+
> ;-----------------------------------------------------------------------------
> ; void interp_4tap_horiz_pp_2x4(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
> ;-----------------------------------------------------------------------------
>diff -r c135c117ffb0 -r 829814365241 source/common/x86/ipfilter8.h
>--- a/source/common/x86/ipfilter8.h	Tue Apr 21 13:42:36 2015 -0500
>+++ b/source/common/x86/ipfilter8.h	Tue Apr 21 20:37:47 2015 -0700
>@@ -814,6 +814,38 @@
> void x265_interp_4tap_horiz_pp_4x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_4tap_horiz_pp_4x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_4tap_horiz_pp_4x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_6x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_6x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x2_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x4_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x6_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x12_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_8x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_12x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_12x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x4_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x12_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x24_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_16x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_24x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_24x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x24_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_32x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_48x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_64x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_64x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_64x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_pp_64x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> #undef LUMA_FILTERS
> #undef LUMA_SP_FILTERS
> #undef LUMA_SS_FILTERS
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150422/a56ca783/attachment-0001.html>


More information about the x265-devel mailing list