[x265] [PATCH] asm: interp_4tap_horiz_pp sse3
dtyx265 at gmail.com
dtyx265 at gmail.com
Wed Apr 22 03:13:47 CEST 2015
# HG changeset patch
# User David T Yuen <dtyx265 at gmail.com>
# Date 1429665160 25200
# Node ID defd1cf26749f3395750ef9128c9a90bfa2caf78
# Parent c135c117ffb083a00d4353279ea669e8f3f7a8ee
asm: interp_4tap_horiz_pp sse3
This replaces c code for 6x8, 6x16, 8x2, 8x4, 8x6, 8x8, 8x12, 8x16, 8x32, 8x64, 12x16, 12x32, 16x8, 16x12,
16x16, 16x24, 16x32, 16x64, 24x32, 24x64, 32x8, 32x16, 32x24, 32x32, 32x48, 32x64, 48x64, 64x16, 64x32,
64x48, 64x64
Macros are used to add the primitives to asm-primitives.cpp
64-bit
./test/TestBench --testbench interp | grep hpp
chroma_hpp[ 8x8] 3.01x 3090.03 9315.59
chroma_hpp[16x16] 3.12x 11813.55 36843.73
chroma_hpp[32x32] 3.48x 46795.53 162656.64
chroma_hpp[ 8x4] 2.94x 1570.01 4616.06
chroma_hpp[ 16x8] 3.12x 5935.00 18531.73
chroma_hpp[ 8x16] 3.02x 6132.50 18522.87
chroma_hpp[32x16] 3.45x 23332.54 80401.67
chroma_hpp[16x32] 3.08x 23632.81 72831.01
chroma_hpp[ 8x6] 2.93x 2340.00 6865.84
chroma_hpp[ 6x8] 2.52x 2810.02 7078.24
chroma_hpp[ 8x2] 2.26x 812.50 1832.25
chroma_hpp[16x12] 3.10x 8874.99 27547.49
chroma_hpp[12x16] 2.88x 9537.65 27476.47
chroma_hpp[ 16x4] 3.05x 2995.00 9139.82
chroma_hpp[32x24] 3.44x 35116.93 120804.25
chroma_hpp[24x32] 3.49x 35175.14 122591.48
chroma_hpp[ 32x8] 3.46x 11692.55 40400.35
chroma_hpp[ 8x32] 2.99x 12239.74 36603.12
chroma_hpp[ 8x16] 3.02x 6132.29 18520.52
chroma_hpp[16x32] 3.08x 23706.51 73120.24
chroma_hpp[32x64] 3.44x 93688.69 322076.91
chroma_hpp[ 8x8] 3.01x 3092.83 9313.43
chroma_hpp[16x16] 3.11x 11812.58 36774.01
chroma_hpp[ 8x32] 3.00x 12211.42 36602.97
chroma_hpp[32x32] 3.45x 46773.89 161223.02
chroma_hpp[16x64] 3.10x 47342.81 146912.95
chroma_hpp[ 8x12] 3.04x 4612.09 14000.78
chroma_hpp[ 6x16] 2.49x 5572.52 13871.37
chroma_hpp[ 8x4] 2.93x 1572.50 4612.88
chroma_hpp[16x24] 3.08x 17693.42 54546.35
chroma_hpp[12x32] 2.89x 19018.60 54936.23
chroma_hpp[ 16x8] 3.10x 5935.00 18377.62
chroma_hpp[32x48] 3.43x 70290.28 241380.27
chroma_hpp[24x64] 3.45x 70691.30 244043.80
chroma_hpp[32x16] 3.45x 23336.01 80519.96
chroma_hpp[ 8x64] 3.00x 24447.52 73434.10
chroma_hpp[ 8x8] 3.01x 3090.03 9312.50
chroma_hpp[16x16] 3.12x 11815.11 36889.00
chroma_hpp[32x32] 3.45x 46777.54 161214.95
chroma_hpp[64x64] 3.26x 195004.23 635334.94
chroma_hpp[ 8x4] 2.94x 1569.99 4612.84
chroma_hpp[ 16x8] 3.10x 5935.25 18378.44
chroma_hpp[ 8x16] 3.02x 6132.50 18520.00
chroma_hpp[32x16] 3.45x 23333.03 80500.84
chroma_hpp[16x32] 3.11x 23575.12 73354.38
chroma_hpp[64x32] 3.44x 93016.68 319740.38
chroma_hpp[32x64] 3.43x 93765.36 321706.41
chroma_hpp[16x12] 3.10x 8875.39 27545.59
chroma_hpp[12x16] 2.88x 9545.72 27476.17
chroma_hpp[ 16x4] 3.05x 2995.00 9139.99
chroma_hpp[32x24] 3.44x 35117.75 120899.98
chroma_hpp[24x32] 3.47x 35270.21 122474.86
chroma_hpp[ 32x8] 3.48x 11695.03 40736.42
chroma_hpp[ 8x32] 3.00x 12211.35 36602.55
chroma_hpp[64x48] 3.40x 140230.58 477478.03
chroma_hpp[48x64] 3.36x 142474.17 478603.38
chroma_hpp[64x16] 2.17x 73541.03 159600.50
chroma_hpp[16x64] 3.18x 47272.91 150339.42
32-bit
./test/TestBench --testbench interp | grep hpp
chroma_hpp[ 8x8] 2.96x 3164.99 9352.63
chroma_hpp[16x16] 3.09x 11885.01 36676.23
chroma_hpp[32x32] 3.47x 46802.81 162473.30
chroma_hpp[ 8x4] 2.86x 1645.03 4704.06
chroma_hpp[ 16x8] 3.06x 6005.04 18378.57
chroma_hpp[ 8x16] 2.97x 6212.50 18430.57
chroma_hpp[32x16] 3.47x 23405.02 81117.30
chroma_hpp[16x32] 3.09x 23645.19 73064.45
chroma_hpp[ 8x6] 2.89x 2405.00 6942.87
chroma_hpp[ 6x8] 2.46x 2905.00 7155.64
chroma_hpp[ 8x2] 2.69x 885.00 2379.88
chroma_hpp[16x12] 3.07x 8945.03 27458.76
chroma_hpp[12x16] 2.89x 9607.83 27761.21
chroma_hpp[ 16x4] 3.01x 3065.00 9231.61
chroma_hpp[32x24] 3.45x 35195.53 121283.95
chroma_hpp[24x32] 3.54x 35269.86 124809.67
chroma_hpp[ 32x8] 3.50x 11765.07 41123.75
chroma_hpp[ 8x32] 2.99x 12285.04 36677.12
chroma_hpp[ 8x16] 2.97x 6212.50 18430.08
chroma_hpp[16x32] 3.08x 23714.90 73078.69
chroma_hpp[32x64] 3.48x 93567.27 325623.00
chroma_hpp[ 8x8] 2.95x 3165.46 9352.40
chroma_hpp[16x16] 3.09x 11885.09 36737.30
chroma_hpp[ 8x32] 2.96x 12285.27 36415.00
chroma_hpp[32x32] 3.49x 46867.79 163765.89
chroma_hpp[16x64] 3.08x 47237.70 145644.56
chroma_hpp[ 8x12] 2.98x 4685.07 13965.77
chroma_hpp[ 6x16] 2.47x 5665.01 13973.91
chroma_hpp[ 8x4] 2.86x 1645.01 4703.00
chroma_hpp[16x24] 3.06x 17765.03 54399.01
chroma_hpp[12x32] 2.88x 19078.06 54941.30
chroma_hpp[ 16x8] 3.05x 6006.28 18330.82
chroma_hpp[32x48] 3.47x 70182.23 243806.36
chroma_hpp[24x64] 3.52x 70337.70 247270.98
chroma_hpp[32x16] 3.47x 23405.88 81119.58
chroma_hpp[ 8x64] 2.97x 24510.83 72856.95
chroma_hpp[ 8x8] 2.95x 3165.02 9352.55
chroma_hpp[16x16] 3.06x 11885.03 36391.55
chroma_hpp[32x32] 3.48x 46826.54 162810.08
chroma_hpp[64x64] 3.27x 194595.14 636533.44
chroma_hpp[ 8x4] 2.86x 1644.97 4703.04
chroma_hpp[ 16x8] 3.05x 6004.99 18331.19
chroma_hpp[ 8x16] 2.97x 6212.49 18430.20
chroma_hpp[32x16] 3.47x 23405.07 81204.01
chroma_hpp[16x32] 3.08x 23645.21 72742.43
chroma_hpp[64x32] 3.45x 93687.98 323369.06
chroma_hpp[32x64] 3.47x 94019.62 325988.94
chroma_hpp[16x12] 3.06x 8945.09 27410.79
chroma_hpp[12x16] 2.86x 9605.17 27448.21
chroma_hpp[ 16x4] 3.01x 3065.00 9231.73
chroma_hpp[32x24] 3.46x 35197.44 121608.38
chroma_hpp[24x32] 3.75x 35264.05 132129.23
chroma_hpp[ 32x8] 3.47x 11765.57 40848.62
chroma_hpp[ 8x32] 2.96x 12285.88 36418.82
chroma_hpp[64x48] 3.44x 140957.80 484415.38
chroma_hpp[48x64] 3.45x 139828.09 482536.94
chroma_hpp[64x16] 3.45x 46615.57 160827.50
chroma_hpp[16x64] 3.07x 47526.13 145913.83
diff -r c135c117ffb0 -r defd1cf26749 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Apr 21 13:42:36 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp Tue Apr 21 18:12:40 2015 -0700
@@ -1407,18 +1407,9 @@
}
if (cpuMask & X265_CPU_SSE3)
{
- p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].filter_hpp = x265_interp_4tap_horiz_pp_2x4_sse3;
- p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].filter_hpp = x265_interp_4tap_horiz_pp_2x8_sse3;
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_hpp = x265_interp_4tap_horiz_pp_4x2_sse3;
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_sse3;
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].filter_hpp = x265_interp_4tap_horiz_pp_4x8_sse3;
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].filter_hpp = x265_interp_4tap_horiz_pp_4x16_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_hpp = x265_interp_4tap_horiz_pp_2x8_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_hpp = x265_interp_4tap_horiz_pp_2x16_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_hpp = x265_interp_4tap_horiz_pp_4x8_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_hpp = x265_interp_4tap_horiz_pp_4x16_sse3;
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_hpp = x265_interp_4tap_horiz_pp_4x32_sse3;
+ ALL_CHROMA_420_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
+ ALL_CHROMA_422_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
+ ALL_CHROMA_444_PU(filter_hpp, interp_4tap_horiz_pp, sse3);
}
if (cpuMask & X265_CPU_SSSE3)
{
diff -r c135c117ffb0 -r defd1cf26749 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Tue Apr 21 13:42:36 2015 -0500
+++ b/source/common/x86/ipfilter8.asm Tue Apr 21 18:12:40 2015 -0700
@@ -578,6 +578,285 @@
RET
+%macro FILTER_H4_w6_sse2 0
+ pxor m4, m4
+ movh m0, [srcq - 1]
+ movh m5, [srcq]
+ punpckldq m0, m5
+ movhlps m2, m0
+ punpcklbw m0, m4
+ punpcklbw m2, m4
+ movd m1, [srcq + 1]
+ movd m5, [srcq + 2]
+ punpckldq m1, m5
+ punpcklbw m1, m4
+ pmaddwd m0, m6
+ pmaddwd m1, m6
+ pmaddwd m2, m6
+ packssdw m0, m1
+ packssdw m2, m2
+ pshuflw m1, m0, q2301
+ pshufhw m1, m1, q2301
+ pshuflw m3, m2, q2301
+ paddw m0, m1
+ paddw m2, m3
+ psrld m0, 16
+ psrld m2, 16
+ packssdw m0, m2
+ paddw m0, m7
+ psraw m0, 6
+ packuswb m0, m0
+ movd [dstq], m0
+ pextrw r4d, m0, 2
+ mov [dstq + 4], r4w
+%endmacro
+
+%macro FILH4W8_sse2 1
+ movh m0, [srcq - 1 + %1]
+ movh m5, [srcq + %1]
+ punpckldq m0, m5
+ movhlps m2, m0
+ punpcklbw m0, m4
+ punpcklbw m2, m4
+ movh m1, [srcq + 1 + %1]
+ movh m5, [srcq + 2 + %1]
+ punpckldq m1, m5
+ movhlps m3, m1
+ punpcklbw m1, m4
+ punpcklbw m3, m4
+ pmaddwd m0, m6
+ pmaddwd m1, m6
+ pmaddwd m2, m6
+ pmaddwd m3, m6
+ packssdw m0, m1
+ packssdw m2, m3
+ pshuflw m1, m0, q2301
+ pshufhw m1, m1, q2301
+ pshuflw m3, m2, q2301
+ pshufhw m3, m3, q2301
+ paddw m0, m1
+ paddw m2, m3
+ psrld m0, 16
+ psrld m2, 16
+ packssdw m0, m2
+ paddw m0, m7
+ psraw m0, 6
+%endmacro
+
+%macro FILTER_H4_w8_sse2 0
+ FILH4W8_sse2 0
+ packuswb m0, m0
+ movh [dstq], m0
+%endmacro
+
+%macro FILTER_H4_w12_sse2 0
+ FILH4W8_sse2 0
+ movd m1, [srcq - 1 + 8]
+ movd m3, [srcq + 8]
+ punpckldq m1, m3
+ punpcklbw m1, m4
+ movd m2, [srcq + 1 + 8]
+ movd m3, [srcq + 2 + 8]
+ punpckldq m2, m3
+ punpcklbw m2, m4
+ pmaddwd m1, m6
+ pmaddwd m2, m6
+ packssdw m1, m2
+ pshuflw m2, m1, q2301
+ pshufhw m2, m2, q2301
+ paddw m1, m2
+ psrld m1, 16
+ packssdw m1, m1
+ paddw m1, m7
+ psraw m1, 6
+ packuswb m0, m1
+ movh [dstq], m0
+ psrldq m0, 8
+ movd [dstq + 8], m0
+%endmacro
+
+%macro FILTER_H4_w16_sse2 0
+ FILH4W8_sse2 0
+ packuswb m0, m0
+ movh [dstq], m0
+ FILH4W8_sse2 8
+ packuswb m0, m0
+ movh [dstq + 8], m0
+%endmacro
+
+%macro FILTER_H4_w24_sse2 0
+ FILH4W8_sse2 0
+ packuswb m0, m0
+ movh [dstq], m0
+ FILH4W8_sse2 8
+ packuswb m0, m0
+ movh [dstq + 8], m0
+ FILH4W8_sse2 16
+ packuswb m0, m0
+ movh [dstq + 16], m0
+%endmacro
+
+%macro FILTER_H4_w32_sse2 0
+ FILH4W8_sse2 0
+ packuswb m0, m0
+ movh [dstq], m0
+ FILH4W8_sse2 8
+ packuswb m0, m0
+ movh [dstq + 8], m0
+ FILH4W8_sse2 16
+ packuswb m0, m0
+ movh [dstq + 16], m0
+ FILH4W8_sse2 24
+ packuswb m0, m0
+ movh [dstq + 24], m0
+%endmacro
+
+%macro FILTER_H4_w48_sse2 0
+ FILH4W8_sse2 0
+ packuswb m0, m0
+ movh [dstq], m0
+ FILH4W8_sse2 8
+ packuswb m0, m0
+ movh [dstq + 8], m0
+ FILH4W8_sse2 16
+ packuswb m0, m0
+ movh [dstq + 16], m0
+ FILH4W8_sse2 24
+ packuswb m0, m0
+ movh [dstq + 24], m0
+ FILH4W8_sse2 32
+ packuswb m0, m0
+ movh [dstq + 32], m0
+ FILH4W8_sse2 40
+ packuswb m0, m0
+ movh [dstq + 40], m0
+%endmacro
+
+%macro FILTER_H4_w64_sse2 0
+ FILH4W8_sse2 0
+ packuswb m0, m0
+ movh [dstq], m0
+ FILH4W8_sse2 8
+ packuswb m0, m0
+ movh [dstq + 8], m0
+ FILH4W8_sse2 16
+ packuswb m0, m0
+ movh [dstq + 16], m0
+ FILH4W8_sse2 24
+ packuswb m0, m0
+ movh [dstq + 24], m0
+ FILH4W8_sse2 32
+ packuswb m0, m0
+ movh [dstq + 32], m0
+ FILH4W8_sse2 40
+ packuswb m0, m0
+ movh [dstq + 40], m0
+ FILH4W8_sse2 48
+ packuswb m0, m0
+ movh [dstq + 48], m0
+ FILH4W8_sse2 56
+ packuswb m0, m0
+ movh [dstq + 56], m0
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void interp_4tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
+;-----------------------------------------------------------------------------
+%macro IPFILTER_CHROMA_sse3 2
+INIT_XMM sse3
+cglobal interp_4tap_horiz_pp_%1x%2, 4, 6, 8, src, srcstride, dst, dststride
+ mov r4d, r4m
+ mova m7, [pw_32]
+ pxor m4, m4
+
+%ifdef PIC
+ lea r5, [tabw_ChromaCoeff]
+ movddup m6, [r5 + r4 * 8]
+%else
+ movddup m6, [tabw_ChromaCoeff + r4 * 8]
+%endif
+
+%assign x 1
+%rep %2
+ FILTER_H4_w%1_sse2
+%if x < %2
+ add srcq, srcstrideq
+ add dstq, dststrideq
+%endif
+%assign x x+1
+%endrep
+
+ RET
+
+%endmacro
+
+ IPFILTER_CHROMA_sse3 6, 8
+ IPFILTER_CHROMA_sse3 8, 2
+ IPFILTER_CHROMA_sse3 8, 4
+ IPFILTER_CHROMA_sse3 8, 6
+ IPFILTER_CHROMA_sse3 8, 8
+ IPFILTER_CHROMA_sse3 8, 16
+ IPFILTER_CHROMA_sse3 8, 32
+ IPFILTER_CHROMA_sse3 12, 16
+
+ IPFILTER_CHROMA_sse3 6, 16
+ IPFILTER_CHROMA_sse3 8, 12
+ IPFILTER_CHROMA_sse3 8, 64
+ IPFILTER_CHROMA_sse3 12, 32
+
+;-----------------------------------------------------------------------------
+; void interp_4tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
+;-----------------------------------------------------------------------------
+%macro IPFILTER_CHROMA_W_sse3 2
+INIT_XMM sse3
+cglobal interp_4tap_horiz_pp_%1x%2, 4, 6, 8, src, srcstride, dst, dststride
+ mov r4d, r4m
+ mova m7, [pw_32]
+ pxor m4, m4
+%ifdef PIC
+ lea r5, [tabw_ChromaCoeff]
+ movddup m6, [r5 + r4 * 8]
+%else
+ movddup m6, [tabw_ChromaCoeff + r4 * 8]
+%endif
+
+%assign x 1
+%rep %2
+ FILTER_H4_w%1_sse2
+%if x < %2
+ add srcq, srcstrideq
+ add dstq, dststrideq
+%endif
+%assign x x+1
+%endrep
+
+ RET
+
+%endmacro
+
+ IPFILTER_CHROMA_W_sse3 16, 4
+ IPFILTER_CHROMA_W_sse3 16, 8
+ IPFILTER_CHROMA_W_sse3 16, 12
+ IPFILTER_CHROMA_W_sse3 16, 16
+ IPFILTER_CHROMA_W_sse3 16, 32
+ IPFILTER_CHROMA_W_sse3 32, 8
+ IPFILTER_CHROMA_W_sse3 32, 16
+ IPFILTER_CHROMA_W_sse3 32, 24
+ IPFILTER_CHROMA_W_sse3 24, 32
+ IPFILTER_CHROMA_W_sse3 32, 32
+
+ IPFILTER_CHROMA_W_sse3 16, 24
+ IPFILTER_CHROMA_W_sse3 16, 64
+ IPFILTER_CHROMA_W_sse3 32, 48
+ IPFILTER_CHROMA_W_sse3 24, 64
+ IPFILTER_CHROMA_W_sse3 32, 64
+
+ IPFILTER_CHROMA_W_sse3 64, 64
+ IPFILTER_CHROMA_W_sse3 64, 32
+ IPFILTER_CHROMA_W_sse3 64, 48
+ IPFILTER_CHROMA_W_sse3 48, 64
+ IPFILTER_CHROMA_W_sse3 64, 16
+
%macro FILTER_H4_w2_2 3
movh %2, [srcq - 1]
pshufb %2, %2, Tm0
diff -r c135c117ffb0 -r defd1cf26749 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h Tue Apr 21 13:42:36 2015 -0500
+++ b/source/common/x86/ipfilter8.h Tue Apr 21 18:12:40 2015 -0700
@@ -814,6 +814,38 @@
void x265_interp_4tap_horiz_pp_4x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_4tap_horiz_pp_4x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
void x265_interp_4tap_horiz_pp_4x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_6x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_6x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x2_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x4_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x6_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x12_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_12x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_12x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x4_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x12_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x24_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_24x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_24x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x8_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x24_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_48x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x16_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x32_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x48_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x64_sse3(const pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
#undef LUMA_FILTERS
#undef LUMA_SP_FILTERS
#undef LUMA_SS_FILTERS
More information about the x265-devel
mailing list