[x265] [PATCH] wrapper code for interpolate HV

Min Chen chenm003 at 163.com
Wed Jan 7 07:28:51 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1420611644 -28800
# Node ID 4c4a4f290bd759fd5c496f8e551fcd653940d3b0
# Parent  357ec738fb0ccaa678ab548629666b118f9f938f
wrapper code for interpolate HV
---
 source/common/x86/asm-primitives.cpp |   53 ++++++++++++++++++++++++++++++++--
 source/common/x86/ipfilter8.asm      |    2 +-
 source/common/x86/ipfilter8.h        |    2 +-
 3 files changed, 52 insertions(+), 5 deletions(-)

diff -r 357ec738fb0c -r 4c4a4f290bd7 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Jan 06 15:39:58 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Jan 07 14:20:44 2015 +0800
@@ -39,6 +39,49 @@
 #include "dct8.h"
 }
 
+
+#define INTERP_8tap_HV_PP(W, H, cpu) \
+    void interp_8tap_hv_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY) \
+{ \
+    ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA)]); \
+    const int filterSize = NTAPS_LUMA; \
+    const int halfFilterSize = filterSize >> 1; \
+    x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu(src, srcStride, immed, MAX_CU_SIZE, idxX, 1); \
+    x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu(immed + (halfFilterSize - 1) * MAX_CU_SIZE, MAX_CU_SIZE, dst, dstStride, idxY); \
+}
+
+#define LUMA_HV_PP_FILTERS(cpu) \
+    INTERP_8tap_HV_PP(4,   4, cpu); \
+    INTERP_8tap_HV_PP(8,   8, cpu); \
+    INTERP_8tap_HV_PP(8,   4, cpu); \
+    INTERP_8tap_HV_PP(4,   8, cpu); \
+    INTERP_8tap_HV_PP(16, 16, cpu); \
+    INTERP_8tap_HV_PP(16,  8, cpu); \
+    INTERP_8tap_HV_PP(8,  16, cpu); \
+    INTERP_8tap_HV_PP(16, 12, cpu); \
+    INTERP_8tap_HV_PP(12, 16, cpu); \
+    INTERP_8tap_HV_PP(16,  4, cpu); \
+    INTERP_8tap_HV_PP(4,  16, cpu); \
+    INTERP_8tap_HV_PP(32, 32, cpu); \
+    INTERP_8tap_HV_PP(32, 16, cpu); \
+    INTERP_8tap_HV_PP(16, 32, cpu); \
+    INTERP_8tap_HV_PP(32, 24, cpu); \
+    INTERP_8tap_HV_PP(24, 32, cpu); \
+    INTERP_8tap_HV_PP(32,  8, cpu); \
+    INTERP_8tap_HV_PP(8,  32, cpu); \
+    INTERP_8tap_HV_PP(64, 64, cpu); \
+    INTERP_8tap_HV_PP(64, 32, cpu); \
+    INTERP_8tap_HV_PP(32, 64, cpu); \
+    INTERP_8tap_HV_PP(64, 48, cpu); \
+    INTERP_8tap_HV_PP(48, 64, cpu); \
+    INTERP_8tap_HV_PP(64, 16, cpu); \
+    INTERP_8tap_HV_PP(16, 64, cpu);
+
+LUMA_HV_PP_FILTERS(_sse4)
+
+#undef LUMA_HV_PP_FILTERS
+#undef INTERP_8tap_HVPP
+
 #define INIT2_NAME(name1, name2, cpu) \
     p.name1[LUMA_16x16] = x265_pixel_ ## name2 ## _16x16 ## cpu; \
     p.name1[LUMA_16x8]  = x265_pixel_ ## name2 ## _16x8 ## cpu;
@@ -531,13 +574,16 @@
     p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
     p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
     p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
-    p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu;
+    p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; \
+    p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_8tap_hv_pp_ ## W ## x ## H ## _c;
 #else
 #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
     p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
     p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
     p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
-    p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu;
+    p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
+    p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; \
+    p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_8tap_hv_pp_ ## W ## x ## H ## cpu;
 #endif // if HIGH_BIT_DEPTH
 
 #define SETUP_LUMA_SUB_FUNC_DEF(W, H, cpu) \
@@ -1636,7 +1682,6 @@
         p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_ssse3;
         p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_ssse3;
 
-        p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3;
         p.luma_p2s = x265_luma_p2s_ssse3;
         p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_ssse3;
         p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_ssse3;
@@ -1692,6 +1737,9 @@
         LUMA_FILTERS(_sse4);
         ASSGN_SSE_SS(sse4);
 
+        // MUST after LUMA_FILTERS to overwrite default version
+        p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_sse4;
+
         p.chroma[X265_CSP_I420].copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
         p.chroma[X265_CSP_I420].copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
         p.chroma[X265_CSP_I420].copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
diff -r 357ec738fb0c -r 4c4a4f290bd7 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Tue Jan 06 15:39:58 2015 +0530
+++ b/source/common/x86/ipfilter8.asm	Wed Jan 07 14:20:44 2015 +0800
@@ -1817,7 +1817,7 @@
 ;-----------------------------------------------------------------------------
 ; void interp_8tap_hv_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int idxX, int idxY)
 ;-----------------------------------------------------------------------------
-INIT_XMM ssse3
+INIT_XMM sse4
 cglobal interp_8tap_hv_pp_8x8, 4, 7, 8, 0-15*16
 %define coef        m7
 %define stk_buf     rsp
diff -r 357ec738fb0c -r 4c4a4f290bd7 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h	Tue Jan 06 15:39:58 2015 +0530
+++ b/source/common/x86/ipfilter8.h	Wed Jan 07 14:20:44 2015 +0800
@@ -618,7 +618,7 @@
 LUMA_SS_FILTERS(_sse2);
 LUMA_FILTERS(_avx2);
 
-void x265_interp_8tap_hv_pp_8x8_ssse3(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
+void x265_interp_8tap_hv_pp_8x8_sse4(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
 void x265_luma_p2s_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
 
 #undef LUMA_FILTERS



More information about the x265-devel mailing list