[x265] [PATCH] asm: interp_4tap_horiz_ps sse3
dtyx265 at gmail.com
dtyx265 at gmail.com
Fri May 22 04:25:07 CEST 2015
# HG changeset patch
# User David T Yuen <dtyx265 at gmail.com>
# Date 1432261447 25200
# Node ID 4330ef5ddfcb64b1a621149fca0a4550c2a2f36f
# Parent 234bc93bd51698801fad77cc861177ed019f5113
asm: interp_4tap_horiz_ps sse3
This replaces c code for all of interp_4tap_horiz_ps for sse3
64-bit
./test/TestBench --testbench interp | grep chroma_hps
chroma_hps[ 4x4] 1.79x 1572.53 2815.09
chroma_hps[ 8x8] 1.79x 4885.74 8750.62
chroma_hps[16x16] 1.78x 15859.17 28211.22
chroma_hps[32x32] 4.36x 57531.00 250840.08
chroma_hps[ 4x2] 1.77x 1148.16 2035.62
chroma_hps[ 2x4] 1.67x 950.38 1585.96
chroma_hps[ 8x4] 1.86x 2972.69 5517.80
chroma_hps[ 4x8] 1.68x 2695.82 4522.98
chroma_hps[ 16x8] 1.76x 9335.05 16452.60
chroma_hps[ 8x16] 1.82x 8200.17 14912.77
chroma_hps[32x16] 4.34x 31368.87 136237.14
chroma_hps[16x32] 1.79x 29065.31 52099.11
chroma_hps[ 8x6] 1.85x 3842.61 7092.53
chroma_hps[ 6x8] 1.77x 3760.28 6662.62
chroma_hps[ 8x2] 1.85x 2145.25 3978.48
chroma_hps[ 2x8] 1.47x 1730.01 2545.01
chroma_hps[16x12] 1.77x 12595.11 22332.64
chroma_hps[12x16] 1.77x 12095.12 21372.96
chroma_hps[ 16x4] 1.80x 5800.24 10418.40
chroma_hps[ 4x16] 1.74x 4400.00 7643.22
chroma_hps[32x24] 4.37x 44308.78 193781.08
chroma_hps[24x32] 3.77x 43365.97 163699.78
chroma_hps[ 32x8] 4.33x 18220.22 78915.17
chroma_hps[ 8x32] 1.84x 14875.27 27300.11
chroma_hps[ 4x8] 1.68x 2700.06 4527.49
chroma_hps[ 8x16] 1.82x 8200.08 14910.00
chroma_hps[16x32] 1.79x 29000.16 51820.20
chroma_hps[32x64] 4.38x 109560.25 479846.81
chroma_hps[ 4x4] 1.79x 1572.82 2815.21
chroma_hps[ 2x8] 1.47x 1730.10 2549.97
chroma_hps[ 8x8] 1.79x 4890.03 8749.99
chroma_hps[ 4x16] 1.74x 4399.99 7647.49
chroma_hps[16x16] 1.78x 15855.35 28211.13
chroma_hps[ 8x32] 1.84x 14865.23 27300.26
chroma_hps[32x32] 4.37x 57474.61 251064.52
chroma_hps[16x64] 1.80x 55211.66 99111.59
chroma_hps[ 8x12] 1.81x 6550.03 11827.51
chroma_hps[ 6x16] 1.81x 6240.21 11299.97
chroma_hps[ 8x4] 1.86x 2972.75 5515.43
chroma_hps[ 2x16] 1.59x 2661.93 4229.99
chroma_hps[16x24] 1.79x 22375.11 39970.46
chroma_hps[12x32] 1.78x 22055.13 39222.56
chroma_hps[ 16x8] 1.76x 9334.98 16452.48
chroma_hps[ 4x32] 1.76x 7910.12 13933.02
chroma_hps[32x48] 4.38x 83471.63 365354.94
chroma_hps[24x64] 3.79x 82642.20 312817.06
chroma_hps[32x16] 4.37x 31278.96 136734.84
chroma_hps[ 8x64] 1.86x 28032.05 52264.57
chroma_hps[ 4x4] 1.79x 1572.48 2810.34
chroma_hps[ 8x8] 1.79x 4890.07 8752.48
chroma_hps[16x16] 1.78x 15856.51 28212.94
chroma_hps[32x32] 4.36x 57629.55 251098.31
chroma_hps[64x64] 4.48x 218639.95 980187.25
chroma_hps[ 8x4] 1.86x 2972.61 5514.96
chroma_hps[ 4x8] 1.68x 2694.99 4527.69
chroma_hps[ 16x8] 1.76x 9330.05 16449.97
chroma_hps[ 8x16] 1.82x 8195.16 14910.27
chroma_hps[32x16] 4.37x 31269.81 136657.97
chroma_hps[16x32] 1.80x 28982.21 52138.08
chroma_hps[64x32] 4.43x 114447.45 506502.94
chroma_hps[32x64] 4.38x 109630.23 479973.81
chroma_hps[16x12] 1.77x 12590.04 22332.41
chroma_hps[12x16] 1.77x 12090.26 21373.04
chroma_hps[ 16x4] 1.80x 5795.19 10418.04
chroma_hps[ 4x16] 1.74x 4395.00 7642.60
chroma_hps[32x24] 4.38x 44240.22 193966.91
chroma_hps[24x32] 3.77x 43525.50 163990.25
chroma_hps[ 32x8] 4.35x 18219.50 79262.66
chroma_hps[ 8x32] 1.85x 14875.31 27582.50
chroma_hps[64x48] 4.51x 166509.48 750447.31
chroma_hps[48x64] 4.37x 164116.52 717985.25
chroma_hps[64x16] 4.42x 62320.70 275730.50
chroma_hps[16x64] 1.79x 55302.99 99178.89
32-bit
./test/TestBench --testbench interp | grep chroma_hps
chroma_hps[ 4x4] 1.84x 1635.25 3002.56
chroma_hps[ 8x8] 1.81x 4950.23 8950.84
chroma_hps[16x16] 1.82x 15833.09 28756.21
chroma_hps[32x32] 2.85x 58060.22 165384.47
chroma_hps[ 4x2] 1.84x 1210.67 2223.03
chroma_hps[ 2x4] 1.64x 1060.56 1734.07
chroma_hps[ 8x4] 1.89x 3030.20 5722.98
chroma_hps[ 4x8] 1.69x 2769.99 4680.27
chroma_hps[ 16x8] 1.78x 9352.70 16650.36
chroma_hps[ 8x16] 1.84x 8260.39 15190.88
chroma_hps[32x16] 2.88x 31450.75 90464.86
chroma_hps[16x32] 1.81x 28938.98 52250.69
chroma_hps[ 8x6] 1.87x 3912.69 7312.47
chroma_hps[ 6x8] 1.73x 3837.58 6620.48
chroma_hps[ 8x2] 1.88x 2210.28 4163.42
chroma_hps[ 2x8] 1.51x 1785.43 2690.27
chroma_hps[16x12] 1.79x 12595.43 22570.54
chroma_hps[12x16] 1.81x 12033.56 21830.21
chroma_hps[ 16x4] 1.82x 5835.04 10620.58
chroma_hps[ 4x16] 1.74x 4471.59 7800.57
chroma_hps[32x24] 2.87x 44425.31 127532.01
chroma_hps[24x32] 2.91x 43241.68 125888.24
chroma_hps[ 32x8] 2.85x 18332.81 52325.82
chroma_hps[ 8x32] 1.86x 14912.98 27770.14
chroma_hps[ 4x8] 1.69x 2772.78 4680.42
chroma_hps[ 8x16] 1.84x 8260.74 15192.68
chroma_hps[16x32] 1.82x 29000.77 52737.20
chroma_hps[32x64] 2.87x 110187.34 315945.28
chroma_hps[ 4x4] 1.83x 1640.56 3003.12
chroma_hps[ 2x8] 1.51x 1785.26 2689.93
chroma_hps[ 8x8] 1.81x 4950.69 8949.97
chroma_hps[ 4x16] 1.74x 4470.17 7799.97
chroma_hps[16x16] 1.80x 15835.98 28490.66
chroma_hps[ 8x32] 1.86x 14912.46 27772.34
chroma_hps[32x32] 2.87x 57640.21 165431.17
chroma_hps[16x64] 1.81x 55077.57 99806.91
chroma_hps[ 8x12] 1.83x 6610.44 12070.25
chroma_hps[ 6x16] 1.77x 6318.38 11179.97
chroma_hps[ 8x4] 1.89x 3030.13 5720.32
chroma_hps[ 2x16] 1.60x 2729.99 4370.72
chroma_hps[16x24] 1.81x 22377.56 40517.34
chroma_hps[12x32] 1.83x 21904.77 40024.01
chroma_hps[ 16x8] 1.78x 9353.93 16651.38
chroma_hps[ 4x32] 1.77x 7972.56 14099.94
chroma_hps[32x48] 2.93x 83770.07 245542.20
chroma_hps[24x64] 3.01x 82919.81 249421.81
chroma_hps[32x16] 2.89x 31399.82 90718.93
chroma_hps[ 8x64] 1.89x 28091.46 53014.97
chroma_hps[ 4x4] 1.84x 1635.45 3003.62
chroma_hps[ 8x8] 1.81x 4953.01 8950.02
chroma_hps[16x16] 1.82x 15833.00 28808.78
chroma_hps[32x32] 2.87x 57625.32 165363.50
chroma_hps[64x64] 2.84x 220439.47 625157.88
chroma_hps[ 8x4] 1.89x 3030.37 5722.53
chroma_hps[ 4x8] 1.69x 2770.17 4681.64
chroma_hps[ 16x8] 1.78x 9355.00 16651.06
chroma_hps[ 8x16] 1.84x 8260.25 15190.74
chroma_hps[32x16] 2.86x 31415.09 89991.48
chroma_hps[16x32] 1.83x 28951.90 52869.09
chroma_hps[64x32] 2.84x 114485.48 324925.31
chroma_hps[32x64] 2.84x 111272.12 316104.81
chroma_hps[16x12] 1.79x 12594.20 22572.86
chroma_hps[12x16] 1.81x 12033.10 21829.97
chroma_hps[ 16x4] 1.82x 5831.06 10621.60
chroma_hps[ 4x16] 1.74x 4472.70 7800.57
chroma_hps[32x24] 2.87x 44476.68 127812.44
chroma_hps[24x32] 2.91x 43258.67 126067.69
chroma_hps[ 32x8] 2.84x 18384.90 52140.39
chroma_hps[ 8x32] 1.86x 14912.74 27770.40
chroma_hps[64x48] 2.83x 168026.05 474962.94
chroma_hps[48x64] 2.85x 165469.59 471339.94
chroma_hps[64x16] 2.84x 62722.09 177881.62
chroma_hps[16x64] 1.81x 55289.16 100343.34
diff -r 234bc93bd516 -r 4330ef5ddfcb source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu May 21 16:34:48 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu May 21 19:24:07 2015 -0700
@@ -1614,6 +1614,9 @@
ALL_CHROMA_420_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
ALL_CHROMA_422_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
ALL_CHROMA_444_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
+ ALL_CHROMA_420_PU(filter_hps, interp_4tap_horiz_ps, sse3);
+ ALL_CHROMA_422_PU(filter_hps, interp_4tap_horiz_ps, sse3);
+ ALL_CHROMA_444_PU(filter_hps, interp_4tap_horiz_ps, sse3);
}
if (cpuMask & X265_CPU_SSSE3)
{
diff -r 234bc93bd516 -r 4330ef5ddfcb source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Thu May 21 16:34:48 2015 +0530
+++ b/source/common/x86/ipfilter8.asm Thu May 21 19:24:07 2015 -0700
@@ -641,6 +641,123 @@
IPFILTER_CHROMA_sse3 48, 64
IPFILTER_CHROMA_sse3 64, 16
+%macro FILTER_2 2
+ movd m3, [srcq + %1]
+ movd m4, [srcq + 1 + %1]
+ punpckldq m3, m4
+ punpcklbw m3, m0
+ pmaddwd m3, m1
+ packssdw m3, m3
+ pshuflw m4, m3, q2301
+ paddw m3, m4
+ psrldq m3, 2
+ psubw m3, m2
+ movd [dstq + %2], m3
+%endmacro
+
+%macro FILTER_4 2
+ movd m3, [srcq + %1]
+ movd m4, [srcq + 1 + %1]
+ punpckldq m3, m4
+ punpcklbw m3, m0
+ pmaddwd m3, m1
+ movd m4, [srcq + 2 + %1]
+ movd m5, [srcq + 3 + %1]
+ punpckldq m4, m5
+ punpcklbw m4, m0
+ pmaddwd m4, m1
+ packssdw m3, m4
+ pshuflw m4, m3, q2301
+ pshufhw m4, m4, q2301
+ paddw m3, m4
+ psrldq m3, 2
+ pshufd m3, m3, q3120
+ psubw m3, m2
+ movh [dstq + %2], m3
+%endmacro
+
+%macro FILTER_4TAP_HPS_sse3 2
+INIT_XMM sse3
+cglobal interp_4tap_horiz_ps_%1x%2, 4, 7, 6, src, srcstride, dst, dststride
+ mov r4d, r4m
+ add dststrided, dststrided
+ mova m2, [pw_2000]
+ pxor m0, m0
+
+%ifdef PIC
+ lea r6, [tabw_ChromaCoeff]
+ movddup m1, [r6 + r4 * 8]
+%else
+ movddup m1, [tabw_ChromaCoeff + r4 * 8]
+%endif
+
+ mov r4d, %2
+ cmp r5m, byte 0
+ je .loopH
+ sub srcq, srcstrideq
+ add r4d, 3
+
+.loopH:
+%assign x -1
+%assign y 0
+%rep %1/4
+ FILTER_4 x,y
+%assign x x+4
+%assign y y+8
+%endrep
+%rep (%1 % 4)/2
+ FILTER_2 x,y
+%endrep
+ add srcq, srcstrideq
+ add dstq, dststrideq
+
+ dec r4d
+ jnz .loopH
+ RET
+
+%endmacro
+
+ FILTER_4TAP_HPS_sse3 2, 4
+ FILTER_4TAP_HPS_sse3 2, 8
+ FILTER_4TAP_HPS_sse3 2, 16
+ FILTER_4TAP_HPS_sse3 4, 2
+ FILTER_4TAP_HPS_sse3 4, 4
+ FILTER_4TAP_HPS_sse3 4, 8
+ FILTER_4TAP_HPS_sse3 4, 16
+ FILTER_4TAP_HPS_sse3 4, 32
+ FILTER_4TAP_HPS_sse3 6, 8
+ FILTER_4TAP_HPS_sse3 6, 16
+ FILTER_4TAP_HPS_sse3 8, 2
+ FILTER_4TAP_HPS_sse3 8, 4
+ FILTER_4TAP_HPS_sse3 8, 6
+ FILTER_4TAP_HPS_sse3 8, 8
+ FILTER_4TAP_HPS_sse3 8, 12
+ FILTER_4TAP_HPS_sse3 8, 16
+ FILTER_4TAP_HPS_sse3 8, 32
+ FILTER_4TAP_HPS_sse3 8, 64
+ FILTER_4TAP_HPS_sse3 12, 16
+ FILTER_4TAP_HPS_sse3 12, 32
+ FILTER_4TAP_HPS_sse3 16, 4
+ FILTER_4TAP_HPS_sse3 16, 8
+ FILTER_4TAP_HPS_sse3 16, 12
+ FILTER_4TAP_HPS_sse3 16, 16
+ FILTER_4TAP_HPS_sse3 16, 24
+ FILTER_4TAP_HPS_sse3 16, 32
+ FILTER_4TAP_HPS_sse3 16, 64
+ FILTER_4TAP_HPS_sse3 24, 32
+ FILTER_4TAP_HPS_sse3 24, 64
+ FILTER_4TAP_HPS_sse3 32, 8
+ FILTER_4TAP_HPS_sse3 32, 16
+ FILTER_4TAP_HPS_sse3 32, 24
+ FILTER_4TAP_HPS_sse3 32, 32
+ FILTER_4TAP_HPS_sse3 32, 48
+ FILTER_4TAP_HPS_sse3 32, 64
+ FILTER_4TAP_HPS_sse3 48, 64
+ FILTER_4TAP_HPS_sse3 64, 16
+ FILTER_4TAP_HPS_sse3 64, 32
+ FILTER_4TAP_HPS_sse3 64, 48
+ FILTER_4TAP_HPS_sse3 64, 64
+
%macro FILTER_H8_W8_sse2 0
movh m1, [r0 + x - 3]
movh m4, [r0 + x - 2]
diff -r 234bc93bd516 -r 4330ef5ddfcb source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h Thu May 21 16:34:48 2015 +0530
+++ b/source/common/x86/ipfilter8.h Thu May 21 19:24:07 2015 -0700
@@ -854,6 +854,47 @@
void x265_interp_4tap_horiz_pp_64x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_4tap_horiz_pp_64x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_4tap_horiz_pp_64x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_ps_2x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_2x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_2x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_4x2_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_4x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_4x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_4x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_4x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_6x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_6x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x2_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x6_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x12_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_12x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_12x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_12x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x4_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x12_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x24_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_24x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_24x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x8_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x24_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x48_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_64x16_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_64x32_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_64x48_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_64x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_48x64_sse3(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
void x265_interp_8tap_horiz_pp_4x4_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_8tap_horiz_pp_4x8_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_8tap_horiz_pp_4x16_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
More information about the x265-devel
mailing list