[x265] [PATCH] asm: interp_4tap_horiz_ps sse3
chen
chenm003 at 163.com
Fri May 22 04:28:48 CEST 2015
right now
At 2015-05-22 10:25:07,dtyx265 at gmail.com wrote:
># HG changeset patch
># User David T Yuen <dtyx265 at gmail.com>
># Date 1432261447 25200
># Node ID 4330ef5ddfcb64b1a621149fca0a4550c2a2f36f
># Parent 234bc93bd51698801fad77cc861177ed019f5113
>asm: interp_4tap_horiz_ps sse3
>
>This replaces c code for all of interp_4tap_horiz_ps for sse3
>
>64-bit
>
>./test/TestBench --testbench interp | grep chroma_hps
>chroma_hps[ 4x4] 1.79x 1572.53 2815.09
>chroma_hps[ 8x8] 1.79x 4885.74 8750.62
>chroma_hps[16x16] 1.78x 15859.17 28211.22
>chroma_hps[32x32] 4.36x 57531.00 250840.08
>chroma_hps[ 4x2] 1.77x 1148.16 2035.62
>chroma_hps[ 2x4] 1.67x 950.38 1585.96
>chroma_hps[ 8x4] 1.86x 2972.69 5517.80
>chroma_hps[ 4x8] 1.68x 2695.82 4522.98
>chroma_hps[ 16x8] 1.76x 9335.05 16452.60
>chroma_hps[ 8x16] 1.82x 8200.17 14912.77
>chroma_hps[32x16] 4.34x 31368.87 136237.14
>chroma_hps[16x32] 1.79x 29065.31 52099.11
>chroma_hps[ 8x6] 1.85x 3842.61 7092.53
>chroma_hps[ 6x8] 1.77x 3760.28 6662.62
>chroma_hps[ 8x2] 1.85x 2145.25 3978.48
>chroma_hps[ 2x8] 1.47x 1730.01 2545.01
>chroma_hps[16x12] 1.77x 12595.11 22332.64
>chroma_hps[12x16] 1.77x 12095.12 21372.96
>chroma_hps[ 16x4] 1.80x 5800.24 10418.40
>chroma_hps[ 4x16] 1.74x 4400.00 7643.22
>chroma_hps[32x24] 4.37x 44308.78 193781.08
>chroma_hps[24x32] 3.77x 43365.97 163699.78
>chroma_hps[ 32x8] 4.33x 18220.22 78915.17
>chroma_hps[ 8x32] 1.84x 14875.27 27300.11
>chroma_hps[ 4x8] 1.68x 2700.06 4527.49
>chroma_hps[ 8x16] 1.82x 8200.08 14910.00
>chroma_hps[16x32] 1.79x 29000.16 51820.20
>chroma_hps[32x64] 4.38x 109560.25 479846.81
>chroma_hps[ 4x4] 1.79x 1572.82 2815.21
>chroma_hps[ 2x8] 1.47x 1730.10 2549.97
>chroma_hps[ 8x8] 1.79x 4890.03 8749.99
>chroma_hps[ 4x16] 1.74x 4399.99 7647.49
>chroma_hps[16x16] 1.78x 15855.35 28211.13
>chroma_hps[ 8x32] 1.84x 14865.23 27300.26
>chroma_hps[32x32] 4.37x 57474.61 251064.52
>chroma_hps[16x64] 1.80x 55211.66 99111.59
>chroma_hps[ 8x12] 1.81x 6550.03 11827.51
>chroma_hps[ 6x16] 1.81x 6240.21 11299.97
>chroma_hps[ 8x4] 1.86x 2972.75 5515.43
>chroma_hps[ 2x16] 1.59x 2661.93 4229.99
>chroma_hps[16x24] 1.79x 22375.11 39970.46
>chroma_hps[12x32] 1.78x 22055.13 39222.56
>chroma_hps[ 16x8] 1.76x 9334.98 16452.48
>chroma_hps[ 4x32] 1.76x 7910.12 13933.02
>chroma_hps[32x48] 4.38x 83471.63 365354.94
>chroma_hps[24x64] 3.79x 82642.20 312817.06
>chroma_hps[32x16] 4.37x 31278.96 136734.84
>chroma_hps[ 8x64] 1.86x 28032.05 52264.57
>chroma_hps[ 4x4] 1.79x 1572.48 2810.34
>chroma_hps[ 8x8] 1.79x 4890.07 8752.48
>chroma_hps[16x16] 1.78x 15856.51 28212.94
>chroma_hps[32x32] 4.36x 57629.55 251098.31
>chroma_hps[64x64] 4.48x 218639.95 980187.25
>chroma_hps[ 8x4] 1.86x 2972.61 5514.96
>chroma_hps[ 4x8] 1.68x 2694.99 4527.69
>chroma_hps[ 16x8] 1.76x 9330.05 16449.97
>chroma_hps[ 8x16] 1.82x 8195.16 14910.27
>chroma_hps[32x16] 4.37x 31269.81 136657.97
>chroma_hps[16x32] 1.80x 28982.21 52138.08
>chroma_hps[64x32] 4.43x 114447.45 506502.94
>chroma_hps[32x64] 4.38x 109630.23 479973.81
>chroma_hps[16x12] 1.77x 12590.04 22332.41
>chroma_hps[12x16] 1.77x 12090.26 21373.04
>chroma_hps[ 16x4] 1.80x 5795.19 10418.04
>chroma_hps[ 4x16] 1.74x 4395.00 7642.60
>chroma_hps[32x24] 4.38x 44240.22 193966.91
>chroma_hps[24x32] 3.77x 43525.50 163990.25
>chroma_hps[ 32x8] 4.35x 18219.50 79262.66
>chroma_hps[ 8x32] 1.85x 14875.31 27582.50
>chroma_hps[64x48] 4.51x 166509.48 750447.31
>chroma_hps[48x64] 4.37x 164116.52 717985.25
>chroma_hps[64x16] 4.42x 62320.70 275730.50
>chroma_hps[16x64] 1.79x 55302.99 99178.89
>
>32-bit
>
>./test/TestBench --testbench interp | grep chroma_hps
>chroma_hps[ 4x4] 1.84x 1635.25 3002.56
>chroma_hps[ 8x8] 1.81x 4950.23 8950.84
>chroma_hps[16x16] 1.82x 15833.09 28756.21
>chroma_hps[32x32] 2.85x 58060.22 165384.47
>chroma_hps[ 4x2] 1.84x 1210.67 2223.03
>chroma_hps[ 2x4] 1.64x 1060.56 1734.07
>chroma_hps[ 8x4] 1.89x 3030.20 5722.98
>chroma_hps[ 4x8] 1.69x 2769.99 4680.27
>chroma_hps[ 16x8] 1.78x 9352.70 16650.36
>chroma_hps[ 8x16] 1.84x 8260.39 15190.88
>chroma_hps[32x16] 2.88x 31450.75 90464.86
>chroma_hps[16x32] 1.81x 28938.98 52250.69
>chroma_hps[ 8x6] 1.87x 3912.69 7312.47
>chroma_hps[ 6x8] 1.73x 3837.58 6620.48
>chroma_hps[ 8x2] 1.88x 2210.28 4163.42
>chroma_hps[ 2x8] 1.51x 1785.43 2690.27
>chroma_hps[16x12] 1.79x 12595.43 22570.54
>chroma_hps[12x16] 1.81x 12033.56 21830.21
>chroma_hps[ 16x4] 1.82x 5835.04 10620.58
>chroma_hps[ 4x16] 1.74x 4471.59 7800.57
>chroma_hps[32x24] 2.87x 44425.31 127532.01
>chroma_hps[24x32] 2.91x 43241.68 125888.24
>chroma_hps[ 32x8] 2.85x 18332.81 52325.82
>chroma_hps[ 8x32] 1.86x 14912.98 27770.14
>chroma_hps[ 4x8] 1.69x 2772.78 4680.42
>chroma_hps[ 8x16] 1.84x 8260.74 15192.68
>chroma_hps[16x32] 1.82x 29000.77 52737.20
>chroma_hps[32x64] 2.87x 110187.34 315945.28
>chroma_hps[ 4x4] 1.83x 1640.56 3003.12
>chroma_hps[ 2x8] 1.51x 1785.26 2689.93
>chroma_hps[ 8x8] 1.81x 4950.69 8949.97
>chroma_hps[ 4x16] 1.74x 4470.17 7799.97
>chroma_hps[16x16] 1.80x 15835.98 28490.66
>chroma_hps[ 8x32] 1.86x 14912.46 27772.34
>chroma_hps[32x32] 2.87x 57640.21 165431.17
>chroma_hps[16x64] 1.81x 55077.57 99806.91
>chroma_hps[ 8x12] 1.83x 6610.44 12070.25
>chroma_hps[ 6x16] 1.77x 6318.38 11179.97
>chroma_hps[ 8x4] 1.89x 3030.13 5720.32
>chroma_hps[ 2x16] 1.60x 2729.99 4370.72
>chroma_hps[16x24] 1.81x 22377.56 40517.34
>chroma_hps[12x32] 1.83x 21904.77 40024.01
>chroma_hps[ 16x8] 1.78x 9353.93 16651.38
>chroma_hps[ 4x32] 1.77x 7972.56 14099.94
>chroma_hps[32x48] 2.93x 83770.07 245542.20
>chroma_hps[24x64] 3.01x 82919.81 249421.81
>chroma_hps[32x16] 2.89x 31399.82 90718.93
>chroma_hps[ 8x64] 1.89x 28091.46 53014.97
>chroma_hps[ 4x4] 1.84x 1635.45 3003.62
>chroma_hps[ 8x8] 1.81x 4953.01 8950.02
>chroma_hps[16x16] 1.82x 15833.00 28808.78
>chroma_hps[32x32] 2.87x 57625.32 165363.50
>chroma_hps[64x64] 2.84x 220439.47 625157.88
>chroma_hps[ 8x4] 1.89x 3030.37 5722.53
>chroma_hps[ 4x8] 1.69x 2770.17 4681.64
>chroma_hps[ 16x8] 1.78x 9355.00 16651.06
>chroma_hps[ 8x16] 1.84x 8260.25 15190.74
>chroma_hps[32x16] 2.86x 31415.09 89991.48
>chroma_hps[16x32] 1.83x 28951.90 52869.09
>chroma_hps[64x32] 2.84x 114485.48 324925.31
>chroma_hps[32x64] 2.84x 111272.12 316104.81
>chroma_hps[16x12] 1.79x 12594.20 22572.86
>chroma_hps[12x16] 1.81x 12033.10 21829.97
>chroma_hps[ 16x4] 1.82x 5831.06 10621.60
>chroma_hps[ 4x16] 1.74x 4472.70 7800.57
>chroma_hps[32x24] 2.87x 44476.68 127812.44
>chroma_hps[24x32] 2.91x 43258.67 126067.69
>chroma_hps[ 32x8] 2.84x 18384.90 52140.39
>chroma_hps[ 8x32] 1.86x 14912.74 27770.40
>chroma_hps[64x48] 2.83x 168026.05 474962.94
>chroma_hps[48x64] 2.85x 165469.59 471339.94
>chroma_hps[64x16] 2.84x 62722.09 177881.62
>chroma_hps[16x64] 1.81x 55289.16 100343.34
>
>diff -r 234bc93bd516 -r 4330ef5ddfcb source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp Thu May 21 16:34:48 2015 +0530
>+++ b/source/common/x86/asm-primitives.cpp Thu May 21 19:24:07 2015 -0700
>@@ -1614,6 +1614,9 @@
> ALL_CHROMA_420_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
> ALL_CHROMA_422_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
> ALL_CHROMA_444_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
>+ ALL_CHROMA_420_PU(filter_hps, interp_4tap_horiz_ps, sse3);
>+ ALL_CHROMA_422_PU(filter_hps, interp_4tap_horiz_ps, sse3);
>+ ALL_CHROMA_444_PU(filter_hps, interp_4tap_horiz_ps, sse3);
> }
> if (cpuMask & X265_CPU_SSSE3)
> {
>diff -r 234bc93bd516 -r 4330ef5ddfcb source/common/x86/ipfilter8.asm
>--- a/source/common/x86/ipfilter8.asm Thu May 21 16:34:48 2015 +0530
>+++ b/source/common/x86/ipfilter8.asm Thu May 21 19:24:07 2015 -0700
>@@ -641,6 +641,123 @@
> IPFILTER_CHROMA_sse3 48, 64
> IPFILTER_CHROMA_sse3 64, 16
>
>+%macro FILTER_2 2
>+ movd m3, [srcq + %1]
>+ movd m4, [srcq + 1 + %1]
>+ punpckldq m3, m4
>+ punpcklbw m3, m0
>+ pmaddwd m3, m1
>+ packssdw m3, m3
>+ pshuflw m4, m3, q2301
>+ paddw m3, m4
>+ psrldq m3, 2
>+ psubw m3, m2
>+ movd [dstq + %2], m3
>+%endmacro
>+
>+%macro FILTER_4 2
>+ movd m3, [srcq + %1]
>+ movd m4, [srcq + 1 + %1]
>+ punpckldq m3, m4
>+ punpcklbw m3, m0
>+ pmaddwd m3, m1
>+ movd m4, [srcq + 2 + %1]
>+ movd m5, [srcq + 3 + %1]
>+ punpckldq m4, m5
>+ punpcklbw m4, m0
>+ pmaddwd m4, m1
>+ packssdw m3, m4
>+ pshuflw m4, m3, q2301
>+ pshufhw m4, m4, q2301
>+ paddw m3, m4
>+ psrldq m3, 2
>+ pshufd m3, m3, q3120
>+ psubw m3, m2
>+ movh [dstq + %2], m3
>+%endmacro
>+
>+%macro FILTER_4TAP_HPS_sse3 2
>+INIT_XMM sse3
>+cglobal interp_4tap_horiz_ps_%1x%2, 4, 7, 6, src, srcstride, dst, dststride
>+ mov r4d, r4m
>+ add dststrided, dststrided
>+ mova m2, [pw_2000]
>+ pxor m0, m0
>+
>+%ifdef PIC
>+ lea r6, [tabw_ChromaCoeff]
>+ movddup m1, [r6 + r4 * 8]
>+%else
>+ movddup m1, [tabw_ChromaCoeff + r4 * 8]
>+%endif
>+
>+ mov r4d, %2
>+ cmp r5m, byte 0
>+ je .loopH
>+ sub srcq, srcstrideq
>+ add r4d, 3
>+
>+.loopH:
>+%assign x -1
>+%assign y 0
>+%rep %1/4
>+ FILTER_4 x,y
>+%assign x x+4
>+%assign y y+8
>+%endrep
>+%rep (%1 % 4)/2
>+ FILTER_2 x,y
>+%endrep
>+ add srcq, srcstrideq
>+ add dstq, dststrideq
>+
>+ dec r4d
>+ jnz .loopH
>+ RET
>+
>+%endmacro
>+
>+ FILTER_4TAP_HPS_sse3 2, 4
>+ FILTER_4TAP_HPS_sse3 2, 8
>+ FILTER_4TAP_HPS_sse3 2, 16
>+ FILTER_4TAP_HPS_sse3 4, 2
>+ FILTER_4TAP_HPS_sse3 4, 4
>+ FILTER_4TAP_HPS_sse3 4, 8
>+ FILTER_4TAP_HPS_sse3 4, 16
>+ FILTER_4TAP_HPS_sse3 4, 32
>+ FILTER_4TAP_HPS_sse3 6, 8
>+ FILTER_4TAP_HPS_sse3 6, 16
>+ FILTER_4TAP_HPS_sse3 8, 2
>+ FILTER_4TAP_HPS_sse3 8, 4
>+ FILTER_4TAP_HPS_sse3 8, 6
>+ FILTER_4TAP_HPS_sse3 8, 8
>+ FILTER_4TAP_HPS_sse3 8, 12
>+ FILTER_4TAP_HPS_sse3 8, 16
>+ FILTER_4TAP_HPS_sse3 8, 32
>+ FILTER_4TAP_HPS_sse3 8, 64
>+ FILTER_4TAP_HPS_sse3 12, 16
>+ FILTER_4TAP_HPS_sse3 12, 32
>+ FILTER_4TAP_HPS_sse3 16, 4
>+ FILTER_4TAP_HPS_sse3 16, 8
>+ FILTER_4TAP_HPS_sse3 16, 12
>+ FILTER_4TAP_HPS_sse3 16, 16
>+ FILTER_4TAP_HPS_sse3 16, 24
>+ FILTER_4TAP_HPS_sse3 16, 32
>+ FILTER_4TAP_HPS_sse3 16, 64
>+ FILTER_4TAP_HPS_sse3 24, 32
>+ FILTER_4TAP_HPS_sse3 24, 64
>+ FILTER_4TAP_HPS_sse3 32, 8
>+ FILTER_4TAP_HPS_sse3 32, 16
>+ FILTER_4TAP_HPS_sse3 32, 24
>+ FILTER_4TAP_HPS_sse3 32, 32
>+ FILTER_4TAP_HPS_sse3 32, 48
>+ FILTER_4TAP_HPS_sse3 32, 64
>+ FILTER_4TAP_HPS_sse3 48, 64
>+ FILTER_4TAP_HPS_sse3 64, 16
>+ FILTER_4TAP_HPS_sse3 64, 32
>+ FILTER_4TAP_HPS_sse3 64, 48
>+ FILTER_4TAP_HPS_sse3 64, 64
>+
> %macro FILTER_H8_W8_sse2 0
> movh m1, [r0 + x - 3]
> movh m4, [r0 + x - 2]
>diff -r 234bc93bd516 -r 4330ef5ddfcb source/common/x86/ipfilter8.h
>--- a/source/common/x86/ipfilter8.h Thu May 21 16:34:48 2015 +0530
>+++ b/source/common/x86/ipfilter8.h Thu May 21 19:24:07 2015 -0700
>@@ -854,6 +854,47 @@
> void x265_interp_4tap_horiz_pp_64x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_4tap_horiz_pp_64x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_4tap_horiz_pp_64x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_ps_2x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_2x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_2x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_4x2_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_4x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_4x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_4x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_4x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_6x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_6x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x2_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x6_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x12_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_12x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_12x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_12x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x12_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x24_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_24x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_24x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x24_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x48_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_64x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_64x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_64x48_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_64x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_48x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
> void x265_interp_8tap_horiz_pp_4x4_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_8tap_horiz_pp_4x8_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_8tap_horiz_pp_4x16_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150522/a5b292c2/attachment-0001.html>
More information about the x265-devel
mailing list