[x265] [PATCH] asm: interp_8tap_horiz pp and ps sse2
dtyx265 at gmail.com
dtyx265 at gmail.com
Tue Apr 28 03:05:06 CEST 2015
# HG changeset patch
# User David T Yuen <dtyx265 at gmail.com>
# Date 1430182995 25200
# Node ID 31b76bd430a47411f7b2ebaa7cfbb44e25c5ff60
# Parent 68a13226d586b335c02cade9311e093f0149c42a
asm: interp_8tap_horiz pp and ps sse2
This replaces c code and covers
4x4, 4x8, 4x16, 8x4, 8x8, 8x16, 8x32, 12x16, 16x8, 16x12, 16x16, 16x32, 16x64,
24x32, 32x8, 32x16, 32x24, 32x32, 32x64, 48x64, 64x16, 64x32, 64x48, 64x64
64-bit
./test/TestBench --testbench interp | grep luma_h
luma_hpp[ 4x4] 1.86x 1852.87 3441.42
luma_hps[ 4x4] 1.78x 4662.80 8315.98
luma_hpp[ 8x8] 2.18x 6126.91 13358.12
luma_hps[ 8x8] 1.93x 11087.93 21351.43
luma_hpp[16x16] 2.54x 24023.32 61097.57
luma_hps[16x16] 2.27x 33064.12 74962.39
luma_hpp[32x32] 2.52x 96782.22 244302.42
luma_hps[32x32] 2.20x 111190.70 244826.36
luma_hpp[64x64] 2.52x 382848.38 965268.81
luma_hps[64x64] 2.31x 404641.47 933627.75
luma_hpp[ 8x4] 2.35x 3055.54 7175.84
luma_hps[ 8x4] 1.89x 8227.78 15575.28
luma_hpp[ 4x8] 1.87x 3690.05 6903.09
luma_hps[ 4x8] 1.81x 6222.84 11235.39
luma_hpp[ 16x8] 2.56x 12006.92 30678.69
luma_hps[ 16x8] 2.26x 21606.27 48876.84
luma_hpp[ 8x16] 2.21x 12225.59 26988.44
luma_hps[ 8x16] 1.92x 16809.11 32258.04
luma_hpp[32x16] 2.50x 47966.91 119978.61
luma_hps[32x16] 2.22x 65530.77 145648.20
luma_hpp[16x32] 2.53x 47943.95 121086.88
luma_hps[16x32] 2.32x 55757.34 129380.16
luma_hpp[64x32] 2.51x 192540.17 482519.84
luma_hps[64x32] 2.31x 223606.27 516736.66
luma_hpp[32x64] 2.52x 191115.05 481093.03
luma_hps[32x64] 2.23x 202603.52 452672.53
luma_hpp[16x12] 2.52x 18149.56 45765.54
luma_hps[16x12] 2.26x 27318.41 61835.58
luma_hpp[12x16] 2.29x 19368.50 44395.32
luma_hps[12x16] 1.85x 25962.37 48125.04
luma_hpp[ 16x4] 2.52x 6027.79 15214.09
luma_hps[ 16x4] 2.26x 15872.79 35917.86
luma_hpp[ 4x16] 1.99x 7470.23 14896.98
luma_hps[ 4x16] 1.83x 9342.87 17077.48
luma_hpp[32x24] 2.52x 71642.52 180756.81
luma_hps[32x24] 2.20x 88241.04 194519.33
luma_hpp[24x32] 2.54x 71766.33 182282.92
luma_hps[24x32] 2.24x 83465.88 187333.91
luma_hpp[ 32x8] 2.51x 23823.40 59883.32
luma_hps[ 32x8] 2.20x 42823.66 94268.15
luma_hpp[ 8x32] 2.38x 24316.01 57792.96
luma_hps[ 8x32] 1.93x 28384.35 54849.94
luma_hpp[64x48] 2.48x 287461.72 712744.88
luma_hps[64x48] 2.29x 313082.53 716684.25
luma_hpp[48x64] 2.53x 287235.50 725398.94
luma_hps[48x64] 2.10x 317556.03 667405.00
luma_hpp[64x16] 2.53x 95767.20 241838.22
luma_hps[64x16] 2.33x 130718.30 304524.62
luma_hpp[16x64] 2.57x 95334.80 244946.48
luma_hps[16x64] 2.28x 101269.19 231212.62
32-bit
/test/TestBench --testbench interp | grep luma_h
luma_hpp[ 4x4] 2.03x 1855.35 3763.12
luma_hps[ 4x4] 1.79x 4827.67 8630.31
luma_hpp[ 8x8] 2.34x 6185.32 14485.46
luma_hps[ 8x8] 1.93x 11082.82 21390.96
luma_hpp[16x16] 2.68x 24277.91 65107.63
luma_hps[16x16] 2.33x 32964.77 76937.46
luma_hpp[32x32] 2.61x 95937.99 250347.53
luma_hps[32x32] 2.24x 110881.09 248610.78
luma_hpp[64x64] 2.59x 384984.62 996606.31
luma_hps[64x64] 2.20x 405764.12 893234.75
luma_hpp[ 8x4] 2.28x 3180.56 7265.61
luma_hps[ 8x4] 1.92x 8242.61 15790.53
luma_hpp[ 4x8] 2.12x 3606.77 7652.88
luma_hps[ 4x8] 1.81x 6427.69 11630.39
luma_hpp[ 16x8] 2.67x 12140.81 32355.57
luma_hps[ 16x8] 2.33x 21643.76 50358.95
luma_hpp[ 8x16] 2.36x 12278.08 29025.54
luma_hps[ 8x16] 1.94x 16762.84 32590.21
luma_hpp[32x16] 2.62x 47968.25 125563.23
luma_hps[32x16] 2.25x 65772.64 147959.81
luma_hpp[16x32] 2.69x 48010.28 129074.63
luma_hps[16x32] 2.32x 56048.27 130077.62
luma_hpp[64x32] 2.57x 191772.20 493535.38
luma_hps[64x32] 2.22x 222292.73 493297.94
luma_hpp[32x64] 2.65x 191459.34 506724.47
luma_hps[32x64] 2.24x 202199.86 452178.41
luma_hpp[16x12] 2.67x 18317.57 48935.64
luma_hps[16x12] 2.33x 27407.98 63835.12
luma_hpp[12x16] 2.26x 19220.64 43485.26
luma_hps[12x16] 1.92x 25738.31 49392.66
luma_hpp[ 16x4] 2.82x 6157.95 17389.13
luma_hps[ 16x4] 2.32x 15962.58 37061.99
luma_hpp[ 4x16] 2.15x 7188.05 15453.23
luma_hps[ 4x16] 1.83x 9628.12 17630.33
luma_hpp[32x24] 2.59x 72228.04 187162.30
luma_hps[32x24] 2.23x 88585.02 197808.91
luma_hpp[24x32] 2.63x 72044.62 189704.78
luma_hps[24x32] 2.25x 83849.13 188660.86
luma_hpp[ 32x8] 2.60x 24121.99 62621.62
luma_hps[ 32x8] 2.22x 43095.41 95508.37
luma_hpp[ 8x32] 2.40x 24388.66 58462.95
luma_hps[ 8x32] 1.97x 28271.80 55649.31
luma_hpp[64x48] 2.64x 286337.97 756953.81
luma_hps[64x48] 2.19x 313365.66 687777.12
luma_hpp[48x64] 2.38x 310695.75 740744.94
luma_hps[48x64] 2.21x 302819.50 668183.75
luma_hpp[64x16] 2.58x 95900.21 247758.72
luma_hps[64x16] 2.17x 131831.64 286192.53
luma_hpp[16x64] 2.70x 95992.55 258870.31
luma_hps[16x64] 2.37x 101104.82 239193.94
diff -r 68a13226d586 -r 31b76bd430a4 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sat Apr 25 01:39:55 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp Mon Apr 27 18:03:15 2015 -0700
@@ -1340,6 +1340,10 @@
CHROMA_420_VSP_FILTERS(_sse2);
CHROMA_422_VSP_FILTERS(_sse2);
CHROMA_444_VSP_FILTERS(_sse2);
+ ALL_LUMA_PU(luma_hpp, interp_8tap_horiz_pp, sse2);
+ p.pu[LUMA_4x4].luma_hpp = x265_interp_8tap_horiz_pp_4x4_sse2;
+ ALL_LUMA_PU(luma_hps, interp_8tap_horiz_ps, sse2);
+ p.pu[LUMA_4x4].luma_hps = x265_interp_8tap_horiz_ps_4x4_sse2;
//p.frameInitLowres = x265_frame_init_lowres_core_mmx2;
p.frameInitLowres = x265_frame_init_lowres_core_sse2;
diff -r 68a13226d586 -r 31b76bd430a4 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Sat Apr 25 01:39:55 2015 -0500
+++ b/source/common/x86/ipfilter8.asm Mon Apr 27 18:03:15 2015 -0700
@@ -160,6 +160,11 @@
db -1, 4, -11, 40, 40, -11, 4, -1
db 0, 1, -5, 17, 58, -10, 4, -1
+tabw_LumaCoeff: dw 0, 0, 0, 64, 0, 0, 0, 0
+ dw -1, 4, -10, 58, 17, -5, 1, 0
+ dw -1, 4, -11, 40, 40, -11, 4, -1
+ dw 0, 1, -5, 17, 58, -10, 4, -1
+
tab_LumaCoeffV: times 4 dw 0, 0
times 4 dw 0, 64
times 4 dw 0, 0
@@ -825,6 +830,230 @@
IPFILTER_CHROMA_W_sse3 48, 64
IPFILTER_CHROMA_W_sse3 64, 16
+%macro FILTER_H8_W8_sse2 0
+ movh m1, [r0 + r5 - 3]
+ movh m4, [r0 + r5 - 3 + 1]
+ punpcklbw m1, m6
+ punpcklbw m4, m6
+ movh m5, [r0 + r5 - 3 + 2]
+ movh m0, [r0 + r5 - 3 + 3]
+ punpcklbw m5, m6
+ punpcklbw m0, m6
+ pmaddwd m1, m3
+ pmaddwd m4, m3
+ pmaddwd m5, m3
+ pmaddwd m0, m3
+ packssdw m1, m4
+ packssdw m5, m0
+ pshuflw m4, m1, q2301
+ pshufhw m4, m4, q2301
+ pshuflw m0, m5, q2301
+ pshufhw m0, m0, q2301
+ paddw m1, m4
+ paddw m5, m0
+ psrldq m1, 2
+ psrldq m5, 2
+ pshufd m1, m1, q3120
+ pshufd m5, m5, q3120
+ punpcklqdq m1, m5
+ movh m7, [r0 + r5 - 3 + 4]
+ movh m4, [r0 + r5 - 3 + 5]
+ punpcklbw m7, m6
+ punpcklbw m4, m6
+ movh m5, [r0 + r5 - 3 + 6]
+ movh m0, [r0 + r5 - 3 + 7]
+ punpcklbw m5, m6
+ punpcklbw m0, m6
+ pmaddwd m7, m3
+ pmaddwd m4, m3
+ pmaddwd m5, m3
+ pmaddwd m0, m3
+ packssdw m7, m4
+ packssdw m5, m0
+ pshuflw m4, m7, q2301
+ pshufhw m4, m4, q2301
+ pshuflw m0, m5, q2301
+ pshufhw m0, m0, q2301
+ paddw m7, m4
+ paddw m5, m0
+ psrldq m7, 2
+ psrldq m5, 2
+ pshufd m7, m7, q3120
+ pshufd m5, m5, q3120
+ punpcklqdq m7, m5
+ pshuflw m4, m1, q2301
+ pshufhw m4, m4, q2301
+ pshuflw m0, m7, q2301
+ pshufhw m0, m0, q2301
+ paddw m1, m4
+ paddw m7, m0
+ psrldq m1, 2
+ psrldq m7, 2
+ pshufd m1, m1, q3120
+ pshufd m7, m7, q3120
+ punpcklqdq m1, m7
+%endmacro
+
+%macro FILTER_H8_W4_sse2 0
+ movh m1, [r0 + r5 - 3]
+ movh m0, [r0 + r5 + 1 - 3]
+ punpcklbw m1, m6
+ punpcklbw m0, m6
+ movh m4, [r0 + r5 + 2 - 3]
+ movh m5, [r0 + r5 + 3 - 3]
+ punpcklbw m4, m6
+ punpcklbw m5, m6
+ pmaddwd m1, m3
+ pmaddwd m0, m3
+ pmaddwd m4, m3
+ pmaddwd m5, m3
+ packssdw m1, m0
+ packssdw m4, m5
+ pshuflw m0, m1, q2301
+ pshufhw m0, m0, q2301
+ pshuflw m5, m4, q2301
+ pshufhw m5, m5, q2301
+ paddw m1, m0
+ paddw m4, m5
+ psrldq m1, 2
+ psrldq m4, 2
+ pshufd m1, m1, q3120
+ pshufd m4, m4, q3120
+ punpcklqdq m1, m4
+ pshuflw m0, m1, q2301
+ pshufhw m0, m0, q2301
+ paddw m1, m0
+ psrldq m1, 2
+ pshufd m1, m1, q3120
+%endmacro
+
+;----------------------------------------------------------------------------------------------------------------------------
+; void interp_8tap_horiz_%3_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx, int isRowExt)
+;----------------------------------------------------------------------------------------------------------------------------
+%macro IPFILTER_LUMA_sse2 3
+INIT_XMM sse2
+cglobal interp_8tap_horiz_%3_%1x%2, 4,7,8
+
+ mov r4d, r4m
+ add r4d, r4d
+ pxor m6, m6
+%ifdef PIC
+ lea r6, [tabw_LumaCoeff]
+ mova m3, [r6 + r4 * 8]
+%else
+ mova m3, [tabw_LumaCoeff + r4 * 8]
+%endif
+
+ mov r4d, %2
+%ifidn %3, pp
+ mova m2, [pw_32]
+%else
+ mova m2, [pw_2000]
+ add r3d, r3d
+ cmp r5m, byte 0
+ je .loopH
+ lea r6, [r1 + 2 * r1]
+ sub r0d, r6d
+ add r4d, 7
+%endif
+
+.loopH:
+ xor r5d, r5d
+%rep %1 / 8
+ FILTER_H8_W8_sse2
+ %ifidn %3, pp
+ paddw m1, m2
+ psraw m1, 6
+ packuswb m1, m1
+ movh [r2 + r5], m1
+ %else
+ psubw m1, m2
+ movu [r2 + 2 * r5], m1
+ %endif
+ add r5d, 8
+%endrep
+
+%rep (%1 % 8) / 4
+ FILTER_H8_W4_sse2
+ %ifidn %3, pp
+ paddw m1, m2
+ psraw m1, 6
+ packuswb m1, m1
+ movd [r2 + r5], m1
+ %else
+ psubw m1, m2
+ movh [r2 + 2 * r5], m1
+ %endif
+%endrep
+
+ add r0d, r1d
+ add r2d, r3d
+
+ dec r4d
+ jnz .loopH
+
+ RET
+
+%endmacro
+
+;--------------------------------------------------------------------------------------------------------------
+; void interp_8tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
+;--------------------------------------------------------------------------------------------------------------
+ IPFILTER_LUMA_sse2 4, 4, pp
+ IPFILTER_LUMA_sse2 4, 8, pp
+ IPFILTER_LUMA_sse2 8, 4, pp
+ IPFILTER_LUMA_sse2 8, 8, pp
+ IPFILTER_LUMA_sse2 16, 16, pp
+ IPFILTER_LUMA_sse2 16, 8, pp
+ IPFILTER_LUMA_sse2 8, 16, pp
+ IPFILTER_LUMA_sse2 16, 12, pp
+ IPFILTER_LUMA_sse2 12, 16, pp
+ IPFILTER_LUMA_sse2 16, 4, pp
+ IPFILTER_LUMA_sse2 4, 16, pp
+ IPFILTER_LUMA_sse2 32, 32, pp
+ IPFILTER_LUMA_sse2 32, 16, pp
+ IPFILTER_LUMA_sse2 16, 32, pp
+ IPFILTER_LUMA_sse2 32, 24, pp
+ IPFILTER_LUMA_sse2 24, 32, pp
+ IPFILTER_LUMA_sse2 32, 8, pp
+ IPFILTER_LUMA_sse2 8, 32, pp
+ IPFILTER_LUMA_sse2 64, 64, pp
+ IPFILTER_LUMA_sse2 64, 32, pp
+ IPFILTER_LUMA_sse2 32, 64, pp
+ IPFILTER_LUMA_sse2 64, 48, pp
+ IPFILTER_LUMA_sse2 48, 64, pp
+ IPFILTER_LUMA_sse2 64, 16, pp
+ IPFILTER_LUMA_sse2 16, 64, pp
+
+;----------------------------------------------------------------------------------------------------------------------------
+; void interp_8tap_horiz_ps_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx, int isRowExt)
+;----------------------------------------------------------------------------------------------------------------------------
+ IPFILTER_LUMA_sse2 4, 4, ps
+ IPFILTER_LUMA_sse2 8, 8, ps
+ IPFILTER_LUMA_sse2 8, 4, ps
+ IPFILTER_LUMA_sse2 4, 8, ps
+ IPFILTER_LUMA_sse2 16, 16, ps
+ IPFILTER_LUMA_sse2 16, 8, ps
+ IPFILTER_LUMA_sse2 8, 16, ps
+ IPFILTER_LUMA_sse2 16, 12, ps
+ IPFILTER_LUMA_sse2 12, 16, ps
+ IPFILTER_LUMA_sse2 16, 4, ps
+ IPFILTER_LUMA_sse2 4, 16, ps
+ IPFILTER_LUMA_sse2 32, 32, ps
+ IPFILTER_LUMA_sse2 32, 16, ps
+ IPFILTER_LUMA_sse2 16, 32, ps
+ IPFILTER_LUMA_sse2 32, 24, ps
+ IPFILTER_LUMA_sse2 24, 32, ps
+ IPFILTER_LUMA_sse2 32, 8, ps
+ IPFILTER_LUMA_sse2 8, 32, ps
+ IPFILTER_LUMA_sse2 64, 64, ps
+ IPFILTER_LUMA_sse2 64, 32, ps
+ IPFILTER_LUMA_sse2 32, 64, ps
+ IPFILTER_LUMA_sse2 64, 48, ps
+ IPFILTER_LUMA_sse2 48, 64, ps
+ IPFILTER_LUMA_sse2 64, 16, ps
+ IPFILTER_LUMA_sse2 16, 64, ps
+
;-----------------------------------------------------------------------------
; void interp_4tap_horiz_pp_2x4(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
;-----------------------------------------------------------------------------
diff -r 68a13226d586 -r 31b76bd430a4 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h Sat Apr 25 01:39:55 2015 -0500
+++ b/source/common/x86/ipfilter8.h Mon Apr 27 18:03:15 2015 -0700
@@ -846,6 +846,56 @@
void x265_interp_4tap_horiz_pp_64x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_4tap_horiz_pp_64x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_4tap_horiz_pp_64x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_4x4_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_4x8_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_4x16_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_8x4_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_8x8_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_8x16_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_8x32_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_12x16_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x4_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x8_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x12_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x16_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x32_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x64_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_24x32_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_32x8_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_32x16_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_32x24_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_32x32_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_32x64_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_48x64_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_64x16_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_64x32_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_64x48_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_64x64_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_ps_4x4_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_4x8_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_4x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_8x4_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_8x8_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_8x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_8x32_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_12x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x4_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x8_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x12_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x32_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x64_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_24x32_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_32x8_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_32x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_32x24_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_32x32_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_32x64_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_48x64_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_64x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_64x32_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_64x48_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_64x64_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
#undef LUMA_FILTERS
#undef LUMA_SP_FILTERS
#undef LUMA_SS_FILTERS
More information about the x265-devel
mailing list