<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Wed, Oct 9, 2013 at 8:54 AM,  <span dir="ltr"><<a href="mailto:praveen@multicorewareinc.com" target="_blank">praveen@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex"># HG changeset patch<br>
# User Praveen Tiwari<br>
# Date 1381326812 -19800<br>
# Node ID 37b42347a5baefe11822888d385e4c8422f4f3f3<br>
# Parent  fc7fbdd18bc0d6d7f98180332e065d83c054fe02<br>
Chroma function, partion based call<br>
<br>
diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/ipfilter.cpp<br>
--- a/source/common/ipfilter.cpp        Wed Oct 09 00:00:10 2013 -0500<br>
+++ b/source/common/ipfilter.cpp        Wed Oct 09 19:23:32 2013 +0530<br>
@@ -34,6 +34,56 @@<br>
 #pragma warning(disable: 4127) // conditional expression is constant, typical for templated functions<br>
 #endif<br>
<br>
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX, WIDTH) \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  = FUNC_PREFIX<4,  WIDTH,  4>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>;<br>
+<br>
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX, WIDTH) \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2]  = FUNC_PREFIX<4,  WIDTH,  2>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  = FUNC_PREFIX<4,  WIDTH,  4>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  = FUNC_PREFIX<4,  WIDTH,  16>;<br>
+<br>
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX, WIDTH) \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>;<br>
+<br>
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX, WIDTH) \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2]  = FUNC_PREFIX<4,  WIDTH,  2>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  = FUNC_PREFIX<4,  WIDTH,  4>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x6]  = FUNC_PREFIX<4,  WIDTH,  6>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  = FUNC_PREFIX<4,  WIDTH,  16>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  = FUNC_PREFIX<4,  WIDTH,  32>;<br>
+<br>
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX, WIDTH) \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  = FUNC_PREFIX<4,  WIDTH,  16>;<br>
+<br>
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX, WIDTH) \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  = FUNC_PREFIX<4,  WIDTH,  4>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x12]  = FUNC_PREFIX<4,  WIDTH,  12>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  = FUNC_PREFIX<4,  WIDTH,  16>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  = FUNC_PREFIX<4,  WIDTH,  32>;<br>
+<br>
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX, WIDTH) \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  = FUNC_PREFIX<4,  WIDTH,  32>;<br>
+<br>
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX, WIDTH) \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  = FUNC_PREFIX<4,  WIDTH,  8>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  = FUNC_PREFIX<4,  WIDTH,  16>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x24]  = FUNC_PREFIX<4,  WIDTH,  24>; \<br>
+    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  = FUNC_PREFIX<4,  WIDTH,  32>;<br>
+<br>
+#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(FUNC_PREFIX) \<br>
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX,  2) \<br>
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX,  4) \<br>
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX,  6) \<br>
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX,  8) \<br>
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX, 12) \<br>
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX, 16) \<br>
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX, 24) \<br>
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX, 32) \<br>
+<br></blockquote><div><br></div><div>Passing width as an argument to a macro with the width in the name seems rather redundant.  I think it would be more clear if you skipped the intermediate macros and just listed them all in this single macro.</div>
<div><br></div><div>FUNC_PREFIX also seems odd for a macro with CHROMA_HORIZONTAL_PP in the name</div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">

 namespace {<br>
 template<int N><br>
 void filterVertical_s_p(short *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, short const *coeff)<br>
@@ -88,8 +138,8 @@<br>
     }<br>
 }<br>
<br>
-template<int N><br>
-void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, short const *coeff)<br>
+template<int N, int width, int height><br>
+void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, short const *coeff)<br>
 {<br></blockquote><div><br></div><div>the original C primitive that takes width/height is still needed for the odd block sizes used during subpel refine.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">

     int cStride = 1;<br>
<br>
@@ -500,11 +550,13 @@<br>
<br>
 void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)<br>
 {<br>
+<br>
+    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(filterHorizontal_p_p)<br>
+<br>
     p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>;<br>
     p.ipfilter_ps[FILTER_H_P_S_8] = filterHorizontal_p_s<8>;<br>
     p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_p_s<8>;<br>
     p.ipfilter_sp[FILTER_V_S_P_8] = filterVertical_s_p<8>;<br>
-    p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>;<br>
     p.ipfilter_ps[FILTER_H_P_S_4] = filterHorizontal_p_s<4>;<br>
     p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_p_s<4>;<br>
     p.ipfilter_sp[FILTER_V_S_P_4] = filterVertical_s_p<4>;<br>
diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/primitives.h<br>
--- a/source/common/primitives.h        Wed Oct 09 00:00:10 2013 -0500<br>
+++ b/source/common/primitives.h        Wed Oct 09 19:23:32 2013 +0530<br>
@@ -89,6 +89,17 @@<br>
     NUM_PARTITIONS<br>
 };<br>
<br>
+enum ChromaPartions<br>
+{<br>
+    CHROMA_HORIZONTAL_PP_PARTITION_2x4,  CHROMA_HORIZONTAL_PP_PARTITION_2x8,  CHROMA_HORIZONTAL_PP_PARTITION_4x2,  CHROMA_HORIZONTAL_PP_PARTITION_4x4,<br>
+    CHROMA_HORIZONTAL_PP_PARTITION_4x8,  CHROMA_HORIZONTAL_PP_PARTITION_4x16,  CHROMA_HORIZONTAL_PP_PARTITION_8x2,  CHROMA_HORIZONTAL_PP_PARTITION_8x4,<br>
+    CHROMA_HORIZONTAL_PP_PARTITION_8x6,  CHROMA_HORIZONTAL_PP_PARTITION_8x8,  CHROMA_HORIZONTAL_PP_PARTITION_8x16,  CHROMA_HORIZONTAL_PP_PARTITION_8x32,<br>
+    CHROMA_HORIZONTAL_PP_PARTITION_6x8,  CHROMA_HORIZONTAL_PP_PARTITION_12x16,  CHROMA_HORIZONTAL_PP_PARTITION_16x4,  CHROMA_HORIZONTAL_PP_PARTITION_16x8,<br>
+    CHROMA_HORIZONTAL_PP_PARTITION_16x12,  CHROMA_HORIZONTAL_PP_PARTITION_16x16,  CHROMA_HORIZONTAL_PP_PARTITION_16x32,  CHROMA_HORIZONTAL_PP_PARTITION_24x32,<br>
+    CHROMA_HORIZONTAL_PP_PARTITION_32x8,  CHROMA_HORIZONTAL_PP_PARTITION_32x16,  CHROMA_HORIZONTAL_PP_PARTITION_32x24,  CHROMA_HORIZONTAL_PP_PARTITION_32x32,<br>
+    NUM_CHROMA_HORIZONTAL_PP_PARTITIONS<br>
+};<br></blockquote><div><br></div><div>the chroma partition table needs to be shareable between all the chroma interpolation routines; these names are too specific (drop HORIZONTAL_PP).</div><div><br></div><div>This table needs to line up with the luma partition table, so you can take a luma part index and use it directly in the chroma function tables to get the appropriately 4:2:0 down-scaled chroma partition.  ..or.. you need to make a mapping table to get from luma part idx to chroma part idx.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+<br>
 enum SquareBlocks   // Routines can be indexed using log2n(width)<br>
 {<br>
     BLOCK_4x4,<br>
@@ -205,6 +216,8 @@<br>
 typedef void (*ssim_4x4x2_core_t)(const pixel *pix1, intptr_t stride1, const pixel *pix2, intptr_t stride2, ssim_t sums[2][4]);<br>
 typedef float (*ssim_end4_t)(ssim_t sum0[5][4], ssim_t sum1[5][4], int width);<br>
<br>
+typedef void (*chromaFilterHoriz_pp)(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, const short *coeff);   // Modified argument list for chroma filter, removed width and height.<br></blockquote><div><br>
</div><div>This same funcdef could be used for horizontal and vertical and luma, so it's best to leave Chroma and Horiz from the name.  Also, funcdefs should end in _t like the rest in this file.</div><div><br></div><div>
Did we decide to put the coefficient tables into the assembly as byte arrays?  if so, the last argument should be int coeffIdx (0..3)</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">

+<br>
 /* Define a structure containing function pointers to optimized encoder<br>
  * primitives.  Each pointer can reference either an assembly routine,<br>
  * a vectorized primitive, or a C function. */<br>
@@ -265,6 +278,8 @@<br>
     downscale_t     frame_init_lowres_core;<br>
     ssim_4x4x2_core_t ssim_4x4x2_core;<br>
     ssim_end4_t       ssim_end_4;<br>
+<br>
+    chromaFilterHoriz_pp  filterHorizontal_p_p[NUM_CHROMA_HORIZONTAL_PP_PARTITIONS];<br></blockquote><div><br></div><div>taking everything above into account, this would be something like:</div><div><br></div><div>   filter_pp_t luma_hpp[NUM_PARTITIONS];<br>
</div><div>   filter_pp_t luma_vpp[NUM_PARTITIONS];</div><div>   filter_pp_t chroma_hpp[NUM_CHROMA_PARTITIONS];</div><div>   filter_pp_t chroma_vpp[NUM_CHROMA_PARTITIONS];</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">

 };<br>
<br>
 /* This copy of the table is what gets used by the encoder.<br>
diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/vec/ipfilter-sse41.cpp<br>
--- a/source/common/vec/ipfilter-sse41.cpp      Wed Oct 09 00:00:10 2013 -0500<br>
+++ b/source/common/vec/ipfilter-sse41.cpp      Wed Oct 09 19:23:32 2013 +0530<br>
@@ -1541,8 +1541,8 @@<br>
     -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0<br>
 };<br>
<br>
-template<int N><br>
-void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, short const *coeff)<br>
+template<int N, int width, int height><br>
+void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, short const *coeff)<br>
 {<br>
     assert(X265_DEPTH == 8);<br>
<br>
@@ -1656,9 +1656,35 @@<br>
 }<br>
<br>
 namespace x265 {<br>
+#define SETUP_PARTITION(W, H) \<br>
+    p.filterHorizontal_p_p[CHROMA_HORIZONTAL_PP_PARTITION_##W##x##H] = filterHorizontal_p_p##<4,  W,  H>;<br>
+<br>
 void Setup_Vec_IPFilterPrimitives_sse41(EncoderPrimitives& p)<br>
 {<br>
-    p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>;<br>
+        SETUP_PARTITION(2,  4);<br>
+        SETUP_PARTITION(2,  8);<br>
+        SETUP_PARTITION(4,  2);<br>
+        SETUP_PARTITION(4,  4);<br>
+        SETUP_PARTITION(4,  8);<br>
+        SETUP_PARTITION(4,  16);<br>
+        SETUP_PARTITION(6,  8);<br>
+        SETUP_PARTITION(8,  2);<br>
+        SETUP_PARTITION(8,  4);<br>
+        SETUP_PARTITION(8,  6);<br>
+        SETUP_PARTITION(8,  8);<br>
+        SETUP_PARTITION(8,  16);<br>
+        SETUP_PARTITION(12,  16);<br>
+        SETUP_PARTITION(16,  4);<br>
+        SETUP_PARTITION(16,  8);<br>
+        SETUP_PARTITION(16,  12);<br>
+        SETUP_PARTITION(16,  16);<br>
+        SETUP_PARTITION(16,  32);<br>
+        SETUP_PARTITION(24,  32);<br>
+        SETUP_PARTITION(32,  8);<br>
+        SETUP_PARTITION(32,  16);<br>
+        SETUP_PARTITION(32,  24);<br>
+        SETUP_PARTITION(32,  32);<br>
+<br>
     p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>;<br>
<br>
     p.ipfilter_pp[FILTER_V_P_P_4] = filterVertical_p_p<4>;<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>