[x265] [PATCH] asm: interp_8tap_horiz pp and ps sse2
dtyx265 at gmail.com
dtyx265 at gmail.com
Wed Apr 29 04:15:23 CEST 2015
# HG changeset patch
# User David T Yuen <dtyx265 at gmail.com>
# Date 1430273620 25200
# Node ID 9b0181193b6a2c64dad26d1749fb6a0e6cf87240
# Parent e9df93f380664932e7d6c7e85b2cae16cd5e1dcd
asm: interp_8tap_horiz pp and ps sse2
This replaces c code and covers
4x4, 4x8, 4x16, 8x4, 8x8, 8x16, 8x32, 12x16, 16x8, 16x12, 16x16, 16x32, 16x64,
24x32, 32x8, 32x16, 32x24, 32x32, 32x64, 48x64, 64x16, 64x32, 64x48, 64x64
64-bit
./test/TestBench --testbench interp | grep luma_h
luma_hpp[ 4x4] 1.93x 1785.23 3440.44
luma_hps[ 4x4] 1.78x 4668.16 8315.97
luma_hpp[ 8x8] 2.20x 6085.12 13358.18
luma_hps[ 8x8] 1.93x 10966.76 21135.28
luma_hpp[16x16] 2.56x 23906.23 61087.05
luma_hps[16x16] 2.30x 32890.60 75619.12
luma_hpp[32x32] 2.59x 94246.86 244558.19
luma_hps[32x32] 2.21x 110899.23 244946.17
luma_hpp[64x64] 2.57x 376029.31 968213.06
luma_hps[64x64] 2.17x 426490.66 925922.69
luma_hpp[ 8x4] 2.37x 3027.70 7175.56
luma_hps[ 8x4] 1.92x 8132.61 15575.28
luma_hpp[ 4x8] 1.93x 3569.09 6902.47
luma_hps[ 4x8] 1.80x 6227.77 11237.01
luma_hpp[ 16x8] 2.57x 11910.59 30562.16
luma_hps[ 16x8] 2.26x 21583.07 48880.50
luma_hpp[ 8x16] 2.25x 12017.56 26988.45
luma_hps[ 8x16] 1.95x 16666.63 32456.38
luma_hpp[32x16] 2.54x 47397.47 120203.77
luma_hps[32x16] 2.20x 65652.99 144470.98
luma_hpp[16x32] 2.55x 47660.34 121469.73
luma_hps[16x32] 2.28x 55609.64 126999.02
luma_hpp[64x32] 2.55x 188588.95 481072.16
luma_hps[64x32] 2.27x 224091.56 508803.62
luma_hpp[32x64] 2.61x 188297.67 491043.78
luma_hps[32x64] 2.21x 202214.22 447216.84
luma_hpp[16x12] 2.54x 17978.03 45745.44
luma_hps[16x12] 2.30x 27224.89 62582.70
luma_hpp[12x16] 2.35x 18915.09 44403.39
luma_hps[12x16] 1.88x 25643.44 48312.90
luma_hpp[ 16x4] 2.55x 5968.94 15212.99
luma_hps[ 16x4] 2.27x 15878.00 36119.47
luma_hpp[ 4x16] 2.07x 7217.63 14905.95
luma_hps[ 4x16] 1.83x 9347.61 17077.63
luma_hpp[32x24] 2.55x 70702.66 179962.50
luma_hps[32x24] 2.21x 88203.63 194572.66
luma_hpp[24x32] 2.56x 70772.55 181361.48
luma_hps[24x32] 2.22x 83448.71 185463.17
luma_hpp[ 32x8] 2.55x 23670.56 60342.42
luma_hps[ 32x8] 2.20x 42789.34 94308.16
luma_hpp[ 8x32] 2.43x 23943.85 58233.07
luma_hps[ 8x32] 1.95x 27989.48 54596.67
luma_hpp[64x48] 2.53x 282044.56 712753.81
luma_hps[64x48] 2.29x 312788.00 717477.88
luma_hpp[48x64] 2.52x 282292.97 712709.81
luma_hps[48x64] 2.19x 302271.06 662558.75
luma_hpp[64x16] 2.52x 94289.28 237360.70
luma_hps[64x16] 2.29x 130756.29 299937.75
luma_hpp[16x64] 2.56x 94725.96 242905.03
luma_hps[16x64] 2.93x 100985.45 295985.50
32-bit
./test/TestBench --testbench interp | grep luma_h
luma_hpp[ 4x4] 2.03x 1857.68 3765.88
luma_hps[ 4x4] 1.86x 4635.35 8630.80
luma_hpp[ 8x8] 2.35x 6162.62 14481.97
luma_hps[ 8x8] 1.94x 11010.38 21393.63
luma_hpp[16x16] 2.71x 23932.59 64745.16
luma_hps[16x16] 2.32x 33022.04 76565.82
luma_hpp[32x32] 2.67x 94757.98 253195.48
luma_hps[32x32] 2.24x 110836.16 248253.83
luma_hpp[64x64] 2.68x 377921.97 1011001.75
luma_hps[64x64] 2.19x 404019.22 884795.44
luma_hpp[ 8x4] 2.34x 3110.22 7265.61
luma_hps[ 8x4] 1.93x 8190.16 15790.72
luma_hpp[ 4x8] 2.10x 3637.87 7653.13
luma_hps[ 4x8] 1.89x 6155.16 11629.97
luma_hpp[ 16x8] 2.73x 11997.50 32709.68
luma_hps[ 16x8] 2.32x 21585.27 50085.42
luma_hpp[ 8x16] 2.41x 12100.89 29205.80
luma_hps[ 8x16] 1.98x 16651.91 32906.20
luma_hpp[32x16] 2.67x 47288.30 126280.91
luma_hps[32x16] 2.23x 65470.68 146288.86
luma_hpp[16x32] 2.70x 47712.78 128709.99
luma_hps[16x32] 2.32x 56009.05 129910.73
luma_hpp[64x32] 2.59x 189383.70 491267.19
luma_hps[64x32] 2.20x 221908.06 487628.59
luma_hpp[32x64] 2.63x 189890.94 498590.69
luma_hps[32x64] 2.25x 202185.36 454678.72
luma_hpp[16x12] 2.68x 18059.54 48459.52
luma_hps[16x12] 2.33x 27225.24 63327.61
luma_hpp[12x16] 2.29x 18988.50 43505.35
luma_hps[12x16] 1.91x 25577.61 48899.88
luma_hpp[ 16x4] 2.87x 6060.81 17385.48
luma_hps[ 16x4] 2.31x 15945.44 36845.93
luma_hpp[ 4x16] 2.12x 7302.50 15455.59
luma_hps[ 4x16] 1.96x 8990.00 17630.94
luma_hpp[32x24] 2.64x 70776.92 186816.78
luma_hps[32x24] 2.23x 88215.06 197043.84
luma_hpp[24x32] 2.70x 71163.41 192404.27
luma_hps[24x32] 2.26x 83641.63 188625.94
luma_hpp[ 32x8] 2.66x 23679.80 63083.98
luma_hps[ 32x8] 2.23x 42923.36 95715.66
luma_hpp[ 8x32] 2.43x 24117.07 58612.02
luma_hps[ 8x32] 1.97x 28059.13 55385.55
luma_hpp[64x48] 2.60x 284101.56 739073.31
luma_hps[64x48] 2.20x 312840.66 688271.56
luma_hpp[48x64] 2.62x 283742.75 743053.19
luma_hps[48x64] 2.21x 304183.97 671124.94
luma_hpp[64x16] 2.59x 94719.38 245008.95
luma_hps[64x16] 2.18x 131164.03 285906.84
luma_hpp[16x64] 2.73x 95453.28 260661.61
luma_hps[16x64] 2.34x 101129.92 236882.66
diff -r e9df93f38066 -r 9b0181193b6a source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Apr 28 20:24:06 2015 +0800
+++ b/source/common/x86/asm-primitives.cpp Tue Apr 28 19:13:40 2015 -0700
@@ -1343,6 +1343,11 @@
CHROMA_422_VSP_FILTERS(_sse2);
CHROMA_444_VSP_FILTERS(_sse2);
+ ALL_LUMA_PU(luma_hpp, interp_8tap_horiz_pp, sse2);
+ p.pu[LUMA_4x4].luma_hpp = x265_interp_8tap_horiz_pp_4x4_sse2;
+ ALL_LUMA_PU(luma_hps, interp_8tap_horiz_ps, sse2);
+ p.pu[LUMA_4x4].luma_hps = x265_interp_8tap_horiz_ps_4x4_sse2;
+
//p.frameInitLowres = x265_frame_init_lowres_core_mmx2;
p.frameInitLowres = x265_frame_init_lowres_core_sse2;
diff -r e9df93f38066 -r 9b0181193b6a source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Tue Apr 28 20:24:06 2015 +0800
+++ b/source/common/x86/ipfilter8.asm Tue Apr 28 19:13:40 2015 -0700
@@ -151,6 +151,12 @@
db -1, 4, -11, 40, 40, -11, 4, -1
db 0, 1, -5, 17, 58, -10, 4, -1
+const tabw_LumaCoeff, dw 0, 0, 0, 64, 0, 0, 0, 0
+ dw -1, 4, -10, 58, 17, -5, 1, 0
+ dw -1, 4, -11, 40, 40, -11, 4, -1
+ dw 0, 1, -5, 17, 58, -10, 4, -1
+
+
const tab_LumaCoeffV, times 4 dw 0, 0
times 4 dw 0, 64
times 4 dw 0, 0
@@ -807,6 +813,233 @@
IPFILTER_CHROMA_W_sse3 48, 64
IPFILTER_CHROMA_W_sse3 64, 16
+%macro FILTER_H8_W8_sse2 0
+ movh m1, [r0 + x - 3]
+ movh m4, [r0 + x - 2]
+ punpcklbw m1, m6
+ punpcklbw m4, m6
+ movh m5, [r0 + x - 1]
+ movh m0, [r0 + x]
+ punpcklbw m5, m6
+ punpcklbw m0, m6
+ pmaddwd m1, m3
+ pmaddwd m4, m3
+ pmaddwd m5, m3
+ pmaddwd m0, m3
+ packssdw m1, m4
+ packssdw m5, m0
+ pshuflw m4, m1, q2301
+ pshufhw m4, m4, q2301
+ pshuflw m0, m5, q2301
+ pshufhw m0, m0, q2301
+ paddw m1, m4
+ paddw m5, m0
+ psrldq m1, 2
+ psrldq m5, 2
+ pshufd m1, m1, q3120
+ pshufd m5, m5, q3120
+ punpcklqdq m1, m5
+ movh m7, [r0 + x + 1]
+ movh m4, [r0 + x + 2]
+ punpcklbw m7, m6
+ punpcklbw m4, m6
+ movh m5, [r0 + x + 3]
+ movh m0, [r0 + x + 4]
+ punpcklbw m5, m6
+ punpcklbw m0, m6
+ pmaddwd m7, m3
+ pmaddwd m4, m3
+ pmaddwd m5, m3
+ pmaddwd m0, m3
+ packssdw m7, m4
+ packssdw m5, m0
+ pshuflw m4, m7, q2301
+ pshufhw m4, m4, q2301
+ pshuflw m0, m5, q2301
+ pshufhw m0, m0, q2301
+ paddw m7, m4
+ paddw m5, m0
+ psrldq m7, 2
+ psrldq m5, 2
+ pshufd m7, m7, q3120
+ pshufd m5, m5, q3120
+ punpcklqdq m7, m5
+ pshuflw m4, m1, q2301
+ pshufhw m4, m4, q2301
+ pshuflw m0, m7, q2301
+ pshufhw m0, m0, q2301
+ paddw m1, m4
+ paddw m7, m0
+ psrldq m1, 2
+ psrldq m7, 2
+ pshufd m1, m1, q3120
+ pshufd m7, m7, q3120
+ punpcklqdq m1, m7
+%endmacro
+
+%macro FILTER_H8_W4_sse2 0
+ movh m1, [r0 + x - 3]
+ movh m0, [r0 + x - 2]
+ punpcklbw m1, m6
+ punpcklbw m0, m6
+ movh m4, [r0 + x - 1]
+ movh m5, [r0 + x]
+ punpcklbw m4, m6
+ punpcklbw m5, m6
+ pmaddwd m1, m3
+ pmaddwd m0, m3
+ pmaddwd m4, m3
+ pmaddwd m5, m3
+ packssdw m1, m0
+ packssdw m4, m5
+ pshuflw m0, m1, q2301
+ pshufhw m0, m0, q2301
+ pshuflw m5, m4, q2301
+ pshufhw m5, m5, q2301
+ paddw m1, m0
+ paddw m4, m5
+ psrldq m1, 2
+ psrldq m4, 2
+ pshufd m1, m1, q3120
+ pshufd m4, m4, q3120
+ punpcklqdq m1, m4
+ pshuflw m0, m1, q2301
+ pshufhw m0, m0, q2301
+ paddw m1, m0
+ psrldq m1, 2
+ pshufd m1, m1, q3120
+%endmacro
+
+;----------------------------------------------------------------------------------------------------------------------------
+; void interp_8tap_horiz_%3_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx, int isRowExt)
+;----------------------------------------------------------------------------------------------------------------------------
+%macro IPFILTER_LUMA_sse2 3
+INIT_XMM sse2
+cglobal interp_8tap_horiz_%3_%1x%2, 4,6,8
+ mov r4d, r4m
+ add r4d, r4d
+ pxor m6, m6
+
+%ifidn %3, ps
+ add r3d, r3d
+ cmp r5m, byte 0
+%endif
+
+%ifdef PIC
+ lea r5, [tabw_LumaCoeff]
+ movu m3, [r5 + r4 * 8]
+%else
+ movu m3, [tabw_LumaCoeff + r4 * 8]
+%endif
+
+ mov r4d, %2
+
+%ifidn %3, pp
+ mova m2, [pw_32]
+%else
+ mova m2, [pw_2000]
+ je .loopH
+ lea r5, [r1 + 2 * r1]
+ sub r0d, r5d
+ add r4d, 7
+%endif
+
+.loopH:
+%assign x 0
+%rep %1 / 8
+ FILTER_H8_W8_sse2
+ %ifidn %3, pp
+ paddw m1, m2
+ psraw m1, 6
+ packuswb m1, m1
+ movh [r2 + x], m1
+ %else
+ psubw m1, m2
+ movu [r2 + 2 * x], m1
+ %endif
+%assign x x+8
+%endrep
+
+%rep (%1 % 8) / 4
+ FILTER_H8_W4_sse2
+ %ifidn %3, pp
+ paddw m1, m2
+ psraw m1, 6
+ packuswb m1, m1
+ movd [r2 + x], m1
+ %else
+ psubw m1, m2
+ movh [r2 + 2 * x], m1
+ %endif
+%endrep
+
+ add r0d, r1d
+ add r2d, r3d
+
+ dec r4d
+ jnz .loopH
+ RET
+
+%endmacro
+
+;--------------------------------------------------------------------------------------------------------------
+; void interp_8tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
+;--------------------------------------------------------------------------------------------------------------
+ IPFILTER_LUMA_sse2 4, 4, pp
+ IPFILTER_LUMA_sse2 4, 8, pp
+ IPFILTER_LUMA_sse2 8, 4, pp
+ IPFILTER_LUMA_sse2 8, 8, pp
+ IPFILTER_LUMA_sse2 16, 16, pp
+ IPFILTER_LUMA_sse2 16, 8, pp
+ IPFILTER_LUMA_sse2 8, 16, pp
+ IPFILTER_LUMA_sse2 16, 12, pp
+ IPFILTER_LUMA_sse2 12, 16, pp
+ IPFILTER_LUMA_sse2 16, 4, pp
+ IPFILTER_LUMA_sse2 4, 16, pp
+ IPFILTER_LUMA_sse2 32, 32, pp
+ IPFILTER_LUMA_sse2 32, 16, pp
+ IPFILTER_LUMA_sse2 16, 32, pp
+ IPFILTER_LUMA_sse2 32, 24, pp
+ IPFILTER_LUMA_sse2 24, 32, pp
+ IPFILTER_LUMA_sse2 32, 8, pp
+ IPFILTER_LUMA_sse2 8, 32, pp
+ IPFILTER_LUMA_sse2 64, 64, pp
+ IPFILTER_LUMA_sse2 64, 32, pp
+ IPFILTER_LUMA_sse2 32, 64, pp
+ IPFILTER_LUMA_sse2 64, 48, pp
+ IPFILTER_LUMA_sse2 48, 64, pp
+ IPFILTER_LUMA_sse2 64, 16, pp
+ IPFILTER_LUMA_sse2 16, 64, pp
+
+;----------------------------------------------------------------------------------------------------------------------------
+; void interp_8tap_horiz_ps_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx, int isRowExt)
+;----------------------------------------------------------------------------------------------------------------------------
+ IPFILTER_LUMA_sse2 4, 4, ps
+ IPFILTER_LUMA_sse2 8, 8, ps
+ IPFILTER_LUMA_sse2 8, 4, ps
+ IPFILTER_LUMA_sse2 4, 8, ps
+ IPFILTER_LUMA_sse2 16, 16, ps
+ IPFILTER_LUMA_sse2 16, 8, ps
+ IPFILTER_LUMA_sse2 8, 16, ps
+ IPFILTER_LUMA_sse2 16, 12, ps
+ IPFILTER_LUMA_sse2 12, 16, ps
+ IPFILTER_LUMA_sse2 16, 4, ps
+ IPFILTER_LUMA_sse2 4, 16, ps
+ IPFILTER_LUMA_sse2 32, 32, ps
+ IPFILTER_LUMA_sse2 32, 16, ps
+ IPFILTER_LUMA_sse2 16, 32, ps
+ IPFILTER_LUMA_sse2 32, 24, ps
+ IPFILTER_LUMA_sse2 24, 32, ps
+ IPFILTER_LUMA_sse2 32, 8, ps
+ IPFILTER_LUMA_sse2 8, 32, ps
+ IPFILTER_LUMA_sse2 64, 64, ps
+ IPFILTER_LUMA_sse2 64, 32, ps
+ IPFILTER_LUMA_sse2 32, 64, ps
+ IPFILTER_LUMA_sse2 64, 48, ps
+ IPFILTER_LUMA_sse2 48, 64, ps
+ IPFILTER_LUMA_sse2 64, 16, ps
+ IPFILTER_LUMA_sse2 16, 64, ps
+
;-----------------------------------------------------------------------------
; void interp_4tap_horiz_pp_2x4(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
;-----------------------------------------------------------------------------
diff -r e9df93f38066 -r 9b0181193b6a source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h Tue Apr 28 20:24:06 2015 +0800
+++ b/source/common/x86/ipfilter8.h Tue Apr 28 19:13:40 2015 -0700
@@ -850,6 +850,56 @@
void x265_interp_4tap_horiz_pp_64x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_4tap_horiz_pp_64x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_4tap_horiz_pp_64x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_4x4_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_4x8_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_4x16_sse2(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_8x4_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_8x8_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_8x16_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_8x32_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_12x16_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x4_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x8_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x12_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x16_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x32_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_16x64_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_24x32_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_32x8_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_32x16_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_32x24_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_32x32_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_32x64_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_48x64_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_64x16_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_64x32_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_64x48_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_pp_64x64_sse2(const pixel* src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_8tap_horiz_ps_4x4_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_4x8_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_4x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_8x4_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_8x8_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_8x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_8x32_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_12x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x4_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x8_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x12_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x32_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_16x64_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_24x32_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_32x8_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_32x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_32x24_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_32x32_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_32x64_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_48x64_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_64x16_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_64x32_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_64x48_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_8tap_horiz_ps_64x64_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
#undef LUMA_FILTERS
#undef LUMA_SP_FILTERS
#undef LUMA_SS_FILTERS
More information about the x265-devel
mailing list