[x265] [PATCH] asm: interp_4tap_horiz_ps sse3

chen chenm003 at 163.com
Fri May 22 04:28:48 CEST 2015


right now

At 2015-05-22 10:25:07,dtyx265 at gmail.com wrote:
># HG changeset patch
># User David T Yuen <dtyx265 at gmail.com>
># Date 1432261447 25200
># Node ID 4330ef5ddfcb64b1a621149fca0a4550c2a2f36f
># Parent  234bc93bd51698801fad77cc861177ed019f5113
>asm: interp_4tap_horiz_ps sse3
>
>This replaces c code for all of interp_4tap_horiz_ps for sse3
>
>64-bit
>
>./test/TestBench --testbench interp | grep chroma_hps
>chroma_hps[  4x4] 1.79x   1572.53    2815.09
>chroma_hps[  8x8] 1.79x   4885.74    8750.62
>chroma_hps[16x16] 1.78x   15859.17   28211.22
>chroma_hps[32x32] 4.36x   57531.00   250840.08
>chroma_hps[  4x2] 1.77x   1148.16    2035.62
>chroma_hps[  2x4] 1.67x   950.38     1585.96
>chroma_hps[  8x4] 1.86x   2972.69    5517.80
>chroma_hps[  4x8] 1.68x   2695.82    4522.98
>chroma_hps[ 16x8] 1.76x   9335.05    16452.60
>chroma_hps[ 8x16] 1.82x   8200.17    14912.77
>chroma_hps[32x16] 4.34x   31368.87   136237.14
>chroma_hps[16x32] 1.79x   29065.31   52099.11
>chroma_hps[  8x6] 1.85x   3842.61    7092.53
>chroma_hps[  6x8] 1.77x   3760.28    6662.62
>chroma_hps[  8x2] 1.85x   2145.25    3978.48
>chroma_hps[  2x8] 1.47x   1730.01    2545.01
>chroma_hps[16x12] 1.77x   12595.11   22332.64
>chroma_hps[12x16] 1.77x   12095.12   21372.96
>chroma_hps[ 16x4] 1.80x   5800.24    10418.40
>chroma_hps[ 4x16] 1.74x   4400.00    7643.22
>chroma_hps[32x24] 4.37x   44308.78   193781.08
>chroma_hps[24x32] 3.77x   43365.97   163699.78
>chroma_hps[ 32x8] 4.33x   18220.22   78915.17
>chroma_hps[ 8x32] 1.84x   14875.27   27300.11
>chroma_hps[  4x8] 1.68x   2700.06    4527.49
>chroma_hps[ 8x16] 1.82x   8200.08    14910.00
>chroma_hps[16x32] 1.79x   29000.16   51820.20
>chroma_hps[32x64] 4.38x   109560.25   479846.81
>chroma_hps[  4x4] 1.79x   1572.82    2815.21
>chroma_hps[  2x8] 1.47x   1730.10    2549.97
>chroma_hps[  8x8] 1.79x   4890.03    8749.99
>chroma_hps[ 4x16] 1.74x   4399.99    7647.49
>chroma_hps[16x16] 1.78x   15855.35   28211.13
>chroma_hps[ 8x32] 1.84x   14865.23   27300.26
>chroma_hps[32x32] 4.37x   57474.61   251064.52
>chroma_hps[16x64] 1.80x   55211.66   99111.59
>chroma_hps[ 8x12] 1.81x   6550.03    11827.51
>chroma_hps[ 6x16] 1.81x   6240.21    11299.97
>chroma_hps[  8x4] 1.86x   2972.75    5515.43
>chroma_hps[ 2x16] 1.59x   2661.93    4229.99
>chroma_hps[16x24] 1.79x   22375.11   39970.46
>chroma_hps[12x32] 1.78x   22055.13   39222.56
>chroma_hps[ 16x8] 1.76x   9334.98    16452.48
>chroma_hps[ 4x32] 1.76x   7910.12    13933.02
>chroma_hps[32x48] 4.38x   83471.63   365354.94
>chroma_hps[24x64] 3.79x   82642.20   312817.06
>chroma_hps[32x16] 4.37x   31278.96   136734.84
>chroma_hps[ 8x64] 1.86x   28032.05   52264.57
>chroma_hps[  4x4] 1.79x   1572.48    2810.34
>chroma_hps[  8x8] 1.79x   4890.07    8752.48
>chroma_hps[16x16] 1.78x   15856.51   28212.94
>chroma_hps[32x32] 4.36x   57629.55   251098.31
>chroma_hps[64x64] 4.48x   218639.95   980187.25
>chroma_hps[  8x4] 1.86x   2972.61    5514.96
>chroma_hps[  4x8] 1.68x   2694.99    4527.69
>chroma_hps[ 16x8] 1.76x   9330.05    16449.97
>chroma_hps[ 8x16] 1.82x   8195.16    14910.27
>chroma_hps[32x16] 4.37x   31269.81   136657.97
>chroma_hps[16x32] 1.80x   28982.21   52138.08
>chroma_hps[64x32] 4.43x   114447.45   506502.94
>chroma_hps[32x64] 4.38x   109630.23   479973.81
>chroma_hps[16x12] 1.77x   12590.04   22332.41
>chroma_hps[12x16] 1.77x   12090.26   21373.04
>chroma_hps[ 16x4] 1.80x   5795.19    10418.04
>chroma_hps[ 4x16] 1.74x   4395.00    7642.60
>chroma_hps[32x24] 4.38x   44240.22   193966.91
>chroma_hps[24x32] 3.77x   43525.50   163990.25
>chroma_hps[ 32x8] 4.35x   18219.50   79262.66
>chroma_hps[ 8x32] 1.85x   14875.31   27582.50
>chroma_hps[64x48] 4.51x   166509.48   750447.31
>chroma_hps[48x64] 4.37x   164116.52   717985.25
>chroma_hps[64x16] 4.42x   62320.70   275730.50
>chroma_hps[16x64] 1.79x   55302.99   99178.89
>
>32-bit
>
>./test/TestBench --testbench interp | grep chroma_hps
>chroma_hps[  4x4] 1.84x   1635.25    3002.56
>chroma_hps[  8x8] 1.81x   4950.23    8950.84
>chroma_hps[16x16] 1.82x   15833.09   28756.21
>chroma_hps[32x32] 2.85x   58060.22   165384.47
>chroma_hps[  4x2] 1.84x   1210.67    2223.03
>chroma_hps[  2x4] 1.64x   1060.56    1734.07
>chroma_hps[  8x4] 1.89x   3030.20    5722.98
>chroma_hps[  4x8] 1.69x   2769.99    4680.27
>chroma_hps[ 16x8] 1.78x   9352.70    16650.36
>chroma_hps[ 8x16] 1.84x   8260.39    15190.88
>chroma_hps[32x16] 2.88x   31450.75   90464.86
>chroma_hps[16x32] 1.81x   28938.98   52250.69
>chroma_hps[  8x6] 1.87x   3912.69    7312.47
>chroma_hps[  6x8] 1.73x   3837.58    6620.48
>chroma_hps[  8x2] 1.88x   2210.28    4163.42
>chroma_hps[  2x8] 1.51x   1785.43    2690.27
>chroma_hps[16x12] 1.79x   12595.43   22570.54
>chroma_hps[12x16] 1.81x   12033.56   21830.21
>chroma_hps[ 16x4] 1.82x   5835.04    10620.58
>chroma_hps[ 4x16] 1.74x   4471.59    7800.57
>chroma_hps[32x24] 2.87x   44425.31   127532.01
>chroma_hps[24x32] 2.91x   43241.68   125888.24
>chroma_hps[ 32x8] 2.85x   18332.81   52325.82
>chroma_hps[ 8x32] 1.86x   14912.98   27770.14
>chroma_hps[  4x8] 1.69x   2772.78    4680.42
>chroma_hps[ 8x16] 1.84x   8260.74    15192.68
>chroma_hps[16x32] 1.82x   29000.77   52737.20
>chroma_hps[32x64] 2.87x   110187.34   315945.28
>chroma_hps[  4x4] 1.83x   1640.56    3003.12
>chroma_hps[  2x8] 1.51x   1785.26    2689.93
>chroma_hps[  8x8] 1.81x   4950.69    8949.97
>chroma_hps[ 4x16] 1.74x   4470.17    7799.97
>chroma_hps[16x16] 1.80x   15835.98   28490.66
>chroma_hps[ 8x32] 1.86x   14912.46   27772.34
>chroma_hps[32x32] 2.87x   57640.21   165431.17
>chroma_hps[16x64] 1.81x   55077.57   99806.91
>chroma_hps[ 8x12] 1.83x   6610.44    12070.25
>chroma_hps[ 6x16] 1.77x   6318.38    11179.97
>chroma_hps[  8x4] 1.89x   3030.13    5720.32
>chroma_hps[ 2x16] 1.60x   2729.99    4370.72
>chroma_hps[16x24] 1.81x   22377.56   40517.34
>chroma_hps[12x32] 1.83x   21904.77   40024.01
>chroma_hps[ 16x8] 1.78x   9353.93    16651.38
>chroma_hps[ 4x32] 1.77x   7972.56    14099.94
>chroma_hps[32x48] 2.93x   83770.07   245542.20
>chroma_hps[24x64] 3.01x   82919.81   249421.81
>chroma_hps[32x16] 2.89x   31399.82   90718.93
>chroma_hps[ 8x64] 1.89x   28091.46   53014.97
>chroma_hps[  4x4] 1.84x   1635.45    3003.62
>chroma_hps[  8x8] 1.81x   4953.01    8950.02
>chroma_hps[16x16] 1.82x   15833.00   28808.78
>chroma_hps[32x32] 2.87x   57625.32   165363.50
>chroma_hps[64x64] 2.84x   220439.47   625157.88
>chroma_hps[  8x4] 1.89x   3030.37    5722.53
>chroma_hps[  4x8] 1.69x   2770.17    4681.64
>chroma_hps[ 16x8] 1.78x   9355.00    16651.06
>chroma_hps[ 8x16] 1.84x   8260.25    15190.74
>chroma_hps[32x16] 2.86x   31415.09   89991.48
>chroma_hps[16x32] 1.83x   28951.90   52869.09
>chroma_hps[64x32] 2.84x   114485.48   324925.31
>chroma_hps[32x64] 2.84x   111272.12   316104.81
>chroma_hps[16x12] 1.79x   12594.20   22572.86
>chroma_hps[12x16] 1.81x   12033.10   21829.97
>chroma_hps[ 16x4] 1.82x   5831.06    10621.60
>chroma_hps[ 4x16] 1.74x   4472.70    7800.57
>chroma_hps[32x24] 2.87x   44476.68   127812.44
>chroma_hps[24x32] 2.91x   43258.67   126067.69
>chroma_hps[ 32x8] 2.84x   18384.90   52140.39
>chroma_hps[ 8x32] 1.86x   14912.74   27770.40
>chroma_hps[64x48] 2.83x   168026.05   474962.94
>chroma_hps[48x64] 2.85x   165469.59   471339.94
>chroma_hps[64x16] 2.84x   62722.09   177881.62
>chroma_hps[16x64] 1.81x   55289.16   100343.34
>
>diff -r 234bc93bd516 -r 4330ef5ddfcb source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp Thu May 21 16:34:48 2015 +0530
>+++ b/source/common/x86/asm-primitives.cpp Thu May 21 19:24:07 2015 -0700
>@@ -1614,6 +1614,9 @@
>         ALL_CHROMA_420_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
>         ALL_CHROMA_422_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
>         ALL_CHROMA_444_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
>+        ALL_CHROMA_420_PU(filter_hps, interp_4tap_horiz_ps, sse3);
>+        ALL_CHROMA_422_PU(filter_hps, interp_4tap_horiz_ps, sse3);
>+        ALL_CHROMA_444_PU(filter_hps, interp_4tap_horiz_ps, sse3);
>     }
>     if (cpuMask & X265_CPU_SSSE3)
>     {
>diff -r 234bc93bd516 -r 4330ef5ddfcb source/common/x86/ipfilter8.asm
>--- a/source/common/x86/ipfilter8.asm Thu May 21 16:34:48 2015 +0530
>+++ b/source/common/x86/ipfilter8.asm Thu May 21 19:24:07 2015 -0700
>@@ -641,6 +641,123 @@
>     IPFILTER_CHROMA_sse3 48, 64
>     IPFILTER_CHROMA_sse3 64, 16
> 
>+%macro FILTER_2 2
>+    movd        m3,     [srcq + %1]
>+    movd        m4,     [srcq + 1 + %1]
>+    punpckldq   m3,     m4
>+    punpcklbw   m3,     m0
>+    pmaddwd     m3,     m1
>+    packssdw    m3,     m3
>+    pshuflw     m4,     m3, q2301
>+    paddw       m3,     m4
>+    psrldq      m3,     2
>+    psubw       m3,     m2
>+    movd        [dstq + %2], m3
>+%endmacro
>+
>+%macro FILTER_4 2
>+    movd        m3,     [srcq + %1]
>+    movd        m4,     [srcq + 1 + %1]
>+    punpckldq   m3,     m4
>+    punpcklbw   m3,     m0
>+    pmaddwd     m3,     m1
>+    movd        m4,     [srcq + 2 + %1]
>+    movd        m5,     [srcq + 3 + %1]
>+    punpckldq   m4,     m5
>+    punpcklbw   m4,     m0
>+    pmaddwd     m4,     m1
>+    packssdw    m3,     m4
>+    pshuflw     m4,     m3, q2301
>+    pshufhw     m4,     m4, q2301
>+    paddw       m3,     m4
>+    psrldq      m3,     2
>+    pshufd      m3,     m3,     q3120
>+    psubw       m3,     m2
>+    movh        [dstq + %2], m3
>+%endmacro
>+
>+%macro FILTER_4TAP_HPS_sse3 2
>+INIT_XMM sse3
>+cglobal interp_4tap_horiz_ps_%1x%2, 4, 7, 6, src, srcstride, dst, dststride
>+    mov         r4d,    r4m
>+    add         dststrided, dststrided
>+    mova        m2,     [pw_2000]
>+    pxor        m0,     m0
>+
>+%ifdef PIC
>+    lea         r6,     [tabw_ChromaCoeff]
>+    movddup     m1,     [r6 + r4 * 8]
>+%else
>+    movddup     m1,     [tabw_ChromaCoeff + r4 * 8]
>+%endif
>+
>+    mov        r4d,     %2
>+    cmp        r5m,     byte 0
>+    je         .loopH
>+    sub        srcq,    srcstrideq
>+    add        r4d,     3
>+
>+.loopH:
>+%assign x -1
>+%assign y 0
>+%rep %1/4
>+    FILTER_4 x,y
>+%assign x x+4
>+%assign y y+8
>+%endrep
>+%rep (%1 % 4)/2
>+    FILTER_2 x,y
>+%endrep
>+    add         srcq,   srcstrideq
>+    add         dstq,   dststrideq
>+
>+    dec         r4d
>+    jnz         .loopH
>+    RET
>+
>+%endmacro
>+
>+    FILTER_4TAP_HPS_sse3 2, 4
>+    FILTER_4TAP_HPS_sse3 2, 8
>+    FILTER_4TAP_HPS_sse3 2, 16
>+    FILTER_4TAP_HPS_sse3 4, 2
>+    FILTER_4TAP_HPS_sse3 4, 4
>+    FILTER_4TAP_HPS_sse3 4, 8
>+    FILTER_4TAP_HPS_sse3 4, 16
>+    FILTER_4TAP_HPS_sse3 4, 32
>+    FILTER_4TAP_HPS_sse3 6, 8
>+    FILTER_4TAP_HPS_sse3 6, 16
>+    FILTER_4TAP_HPS_sse3 8, 2
>+    FILTER_4TAP_HPS_sse3 8, 4
>+    FILTER_4TAP_HPS_sse3 8, 6
>+    FILTER_4TAP_HPS_sse3 8, 8
>+    FILTER_4TAP_HPS_sse3 8, 12
>+    FILTER_4TAP_HPS_sse3 8, 16
>+    FILTER_4TAP_HPS_sse3 8, 32
>+    FILTER_4TAP_HPS_sse3 8, 64
>+    FILTER_4TAP_HPS_sse3 12, 16
>+    FILTER_4TAP_HPS_sse3 12, 32
>+    FILTER_4TAP_HPS_sse3 16, 4
>+    FILTER_4TAP_HPS_sse3 16, 8
>+    FILTER_4TAP_HPS_sse3 16, 12
>+    FILTER_4TAP_HPS_sse3 16, 16
>+    FILTER_4TAP_HPS_sse3 16, 24
>+    FILTER_4TAP_HPS_sse3 16, 32
>+    FILTER_4TAP_HPS_sse3 16, 64
>+    FILTER_4TAP_HPS_sse3 24, 32
>+    FILTER_4TAP_HPS_sse3 24, 64
>+    FILTER_4TAP_HPS_sse3 32,  8
>+    FILTER_4TAP_HPS_sse3 32, 16
>+    FILTER_4TAP_HPS_sse3 32, 24
>+    FILTER_4TAP_HPS_sse3 32, 32
>+    FILTER_4TAP_HPS_sse3 32, 48
>+    FILTER_4TAP_HPS_sse3 32, 64
>+    FILTER_4TAP_HPS_sse3 48, 64
>+    FILTER_4TAP_HPS_sse3 64, 16
>+    FILTER_4TAP_HPS_sse3 64, 32
>+    FILTER_4TAP_HPS_sse3 64, 48
>+    FILTER_4TAP_HPS_sse3 64, 64
>+
> %macro FILTER_H8_W8_sse2 0
>     movh        m1, [r0 + x - 3]
>     movh        m4, [r0 + x - 2]
>diff -r 234bc93bd516 -r 4330ef5ddfcb source/common/x86/ipfilter8.h
>--- a/source/common/x86/ipfilter8.h Thu May 21 16:34:48 2015 +0530
>+++ b/source/common/x86/ipfilter8.h Thu May 21 19:24:07 2015 -0700
>@@ -854,6 +854,47 @@
> void x265_interp_4tap_horiz_pp_64x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_4tap_horiz_pp_64x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_4tap_horiz_pp_64x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>+void x265_interp_4tap_horiz_ps_2x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_2x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_2x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_4x2_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_4x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_4x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_4x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_4x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_6x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_6x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x2_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x6_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x12_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_8x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_12x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_12x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_12x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x12_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x24_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_16x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_24x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_24x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x24_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x48_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_32x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_64x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_64x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_64x48_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_64x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>+void x265_interp_4tap_horiz_ps_48x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
> void x265_interp_8tap_horiz_pp_4x4_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_8tap_horiz_pp_4x8_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> void x265_interp_8tap_horiz_pp_4x16_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150522/a5b292c2/attachment-0001.html>


More information about the x265-devel mailing list