[x265] [PATCH REVIEW Only ] Chroma function, partion based call

praveen at multicorewareinc.com praveen at multicorewareinc.com
Wed Oct 9 15:54:28 CEST 2013


# HG changeset patch
# User Praveen Tiwari
# Date 1381326812 -19800
# Node ID 37b42347a5baefe11822888d385e4c8422f4f3f3
# Parent  fc7fbdd18bc0d6d7f98180332e065d83c054fe02
Chroma function, partion based call

diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp	Wed Oct 09 00:00:10 2013 -0500
+++ b/source/common/ipfilter.cpp	Wed Oct 09 19:23:32 2013 +0530
@@ -34,6 +34,56 @@
 #pragma warning(disable: 4127) // conditional expression is constant, typical for templated functions
 #endif
 
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX, WIDTH) \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  = FUNC_PREFIX<4,  WIDTH,  4>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>;
+
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX, WIDTH) \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2]  = FUNC_PREFIX<4,  WIDTH,  2>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  = FUNC_PREFIX<4,  WIDTH,  4>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  = FUNC_PREFIX<4,  WIDTH,  16>;
+
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX, WIDTH) \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>;
+
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX, WIDTH) \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2]  = FUNC_PREFIX<4,  WIDTH,  2>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  = FUNC_PREFIX<4,  WIDTH,  4>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x6]  = FUNC_PREFIX<4,  WIDTH,  6>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  = FUNC_PREFIX<4,  WIDTH,  16>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  = FUNC_PREFIX<4,  WIDTH,  32>;
+
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX, WIDTH) \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  = FUNC_PREFIX<4,  WIDTH,  16>;
+
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX, WIDTH) \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  = FUNC_PREFIX<4,  WIDTH,  4>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x12]  = FUNC_PREFIX<4,  WIDTH,  12>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  = FUNC_PREFIX<4,  WIDTH,  16>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  = FUNC_PREFIX<4,  WIDTH,  32>;
+
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX, WIDTH) \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  = FUNC_PREFIX<4,  WIDTH,  32>;
+
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX, WIDTH) \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  = FUNC_PREFIX<4,  WIDTH,  16>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x24]  = FUNC_PREFIX<4,  WIDTH,  24>; \
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  = FUNC_PREFIX<4,  WIDTH,  32>;
+
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(FUNC_PREFIX) \
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX,  2) \
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX,  4) \
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX,  6) \
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX,  8) \
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX, 12) \
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX, 16) \
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX, 24) \
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX, 32) \
+
 namespace {
 template<int N>
 void filterVertical_s_p(short *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, short const *coeff)
@@ -88,8 +138,8 @@
     }
 }
 
-template<int N>
-void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, short const *coeff)
+template<int N, int width, int height>
+void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, short const *coeff)
 {
     int cStride = 1;
 
@@ -500,11 +550,13 @@
 
 void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
 {
+
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(filterHorizontal_p_p)
+
     p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>;
     p.ipfilter_ps[FILTER_H_P_S_8] = filterHorizontal_p_s<8>;
     p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_p_s<8>;
     p.ipfilter_sp[FILTER_V_S_P_8] = filterVertical_s_p<8>;
-    p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>;
     p.ipfilter_ps[FILTER_H_P_S_4] = filterHorizontal_p_s<4>;
     p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_p_s<4>;
     p.ipfilter_sp[FILTER_V_S_P_4] = filterVertical_s_p<4>;
diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/primitives.h
--- a/source/common/primitives.h	Wed Oct 09 00:00:10 2013 -0500
+++ b/source/common/primitives.h	Wed Oct 09 19:23:32 2013 +0530
@@ -89,6 +89,17 @@
     NUM_PARTITIONS
 };
 
+enum ChromaPartions
+{
+    CHROMA_HORIZONTAL_PP_PARTITION_2x4,  CHROMA_HORIZONTAL_PP_PARTITION_2x8,  CHROMA_HORIZONTAL_PP_PARTITION_4x2,  CHROMA_HORIZONTAL_PP_PARTITION_4x4,
+    CHROMA_HORIZONTAL_PP_PARTITION_4x8,  CHROMA_HORIZONTAL_PP_PARTITION_4x16,  CHROMA_HORIZONTAL_PP_PARTITION_8x2,  CHROMA_HORIZONTAL_PP_PARTITION_8x4,
+    CHROMA_HORIZONTAL_PP_PARTITION_8x6,  CHROMA_HORIZONTAL_PP_PARTITION_8x8,  CHROMA_HORIZONTAL_PP_PARTITION_8x16,  CHROMA_HORIZONTAL_PP_PARTITION_8x32,
+    CHROMA_HORIZONTAL_PP_PARTITION_6x8,  CHROMA_HORIZONTAL_PP_PARTITION_12x16,  CHROMA_HORIZONTAL_PP_PARTITION_16x4,  CHROMA_HORIZONTAL_PP_PARTITION_16x8,
+    CHROMA_HORIZONTAL_PP_PARTITION_16x12,  CHROMA_HORIZONTAL_PP_PARTITION_16x16,  CHROMA_HORIZONTAL_PP_PARTITION_16x32,  CHROMA_HORIZONTAL_PP_PARTITION_24x32,
+    CHROMA_HORIZONTAL_PP_PARTITION_32x8,  CHROMA_HORIZONTAL_PP_PARTITION_32x16,  CHROMA_HORIZONTAL_PP_PARTITION_32x24,  CHROMA_HORIZONTAL_PP_PARTITION_32x32,
+    NUM_CHROMA_HORIZONTAL_PP_PARTITIONS
+};
+
 enum SquareBlocks   // Routines can be indexed using log2n(width)
 {
     BLOCK_4x4,
@@ -205,6 +216,8 @@
 typedef void (*ssim_4x4x2_core_t)(const pixel *pix1, intptr_t stride1, const pixel *pix2, intptr_t stride2, ssim_t sums[2][4]);
 typedef float (*ssim_end4_t)(ssim_t sum0[5][4], ssim_t sum1[5][4], int width);
 
+typedef void (*chromaFilterHoriz_pp)(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, const short *coeff);   // Modified argument list for chroma filter, removed width and height.
+
 /* Define a structure containing function pointers to optimized encoder
  * primitives.  Each pointer can reference either an assembly routine,
  * a vectorized primitive, or a C function. */
@@ -265,6 +278,8 @@
     downscale_t     frame_init_lowres_core;
     ssim_4x4x2_core_t ssim_4x4x2_core;
     ssim_end4_t       ssim_end_4;
+
+    chromaFilterHoriz_pp  filterHorizontal_p_p[NUM_CHROMA_HORIZONTAL_PP_PARTITIONS];
 };
 
 /* This copy of the table is what gets used by the encoder.
diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/vec/ipfilter-sse41.cpp
--- a/source/common/vec/ipfilter-sse41.cpp	Wed Oct 09 00:00:10 2013 -0500
+++ b/source/common/vec/ipfilter-sse41.cpp	Wed Oct 09 19:23:32 2013 +0530
@@ -1541,8 +1541,8 @@
     -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
-template<int N>
-void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, short const *coeff)
+template<int N, int width, int height>
+void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, short const *coeff)
 {
     assert(X265_DEPTH == 8);
 
@@ -1656,9 +1656,35 @@
 }
 
 namespace x265 {
+#define SETUP_PARTITION(W, H) \
+    p.filterHorizontal_p_p[CHROMA_HORIZONTAL_PP_PARTITION_##W##x##H] = filterHorizontal_p_p##<4,  W,  H>;
+
 void Setup_Vec_IPFilterPrimitives_sse41(EncoderPrimitives& p)
 {
-    p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>;
+        SETUP_PARTITION(2,  4);
+        SETUP_PARTITION(2,  8);
+        SETUP_PARTITION(4,  2);
+        SETUP_PARTITION(4,  4);
+        SETUP_PARTITION(4,  8);
+        SETUP_PARTITION(4,  16);
+        SETUP_PARTITION(6,  8);
+        SETUP_PARTITION(8,  2);
+        SETUP_PARTITION(8,  4);
+        SETUP_PARTITION(8,  6);
+        SETUP_PARTITION(8,  8);
+        SETUP_PARTITION(8,  16);
+        SETUP_PARTITION(12,  16);
+        SETUP_PARTITION(16,  4);
+        SETUP_PARTITION(16,  8);
+        SETUP_PARTITION(16,  12);
+        SETUP_PARTITION(16,  16);
+        SETUP_PARTITION(16,  32);
+        SETUP_PARTITION(24,  32);
+        SETUP_PARTITION(32,  8);
+        SETUP_PARTITION(32,  16);
+        SETUP_PARTITION(32,  24);
+        SETUP_PARTITION(32,  32);
+
     p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>;
 
     p.ipfilter_pp[FILTER_V_P_P_4] = filterVertical_p_p<4>;


More information about the x265-devel mailing list