<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Fri, Jan 3, 2014 at 6:45 AM, <span dir="ltr"><<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User <a href="mailto:ashok@multicorewareinc.com">ashok@multicorewareinc.com</a><br>
# Date 1388753074 -19800<br>
# Fri Jan 03 18:14:34 2014 +0530<br>
# Node ID 019ad3c515b3219497dfa51bd8f8c3a709b7ec5d<br>
# Parent 8137881d4cad4555e1128320d62dd56dd24ed3dc<br>
Modify primitives to support multiple color space formats<br>
<br>
diff -r 8137881d4cad -r 019ad3c515b3 source/common/TShortYUV.h<br>
--- a/source/common/TShortYUV.h Thu Jan 02 16:18:35 2014 +0530<br>
+++ b/source/common/TShortYUV.h Fri Jan 03 18:14:34 2014 +0530<br>
@@ -87,9 +87,9 @@<br>
// Access starting position of YUV partition unit buffer<br>
int16_t* getLumaAddr(unsigned int partUnitIdx) { return m_bufY + getAddrOffset(partUnitIdx, m_width); }<br>
<br>
- int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb + (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }<br>
+ int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb + (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }<br>
<br>
- int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr + (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }<br>
+ int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr + (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }<br>
<br>
// Access starting position of YUV transform unit buffer<br>
int16_t* getLumaAddr(unsigned int partIdx, unsigned int size) { return m_bufY + getAddrOffset(partIdx, size, m_width); }<br>
diff -r 8137881d4cad -r 019ad3c515b3 source/common/ipfilter.cpp<br>
--- a/source/common/ipfilter.cpp Thu Jan 02 16:18:35 2014 +0530<br>
+++ b/source/common/ipfilter.cpp Fri Jan 03 18:14:34 2014 +0530<br>
@@ -449,74 +449,108 @@<br>
namespace x265 {<br>
// x265 private namespace<br>
<br>
-#define CHROMA(W, H) \<br>
+#define CHROMA_420(W, H) \<br>
p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \<br>
p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \<br>
- p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \<br>
- p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \<br>
- p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \<br>
+ p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \<br>
+ p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \<br>
+ p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \<br>
p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;<br></blockquote><div><br></div><div>it's preferable to put white-space changes into their own patch</div><div> </div>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
+#define CHROMA_444(W, H) \<br>
+ p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \<br>
+ p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \<br>
+ p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \<br>
+ p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \<br>
+ p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \<br>
+ p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;<br></blockquote><div><br></div><div>4:4:4 uses 4-tap filters for chroma?</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
#define LUMA(W, H) \<br>
p.luma_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<8, W, H>; \<br>
p.luma_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<8, W, H>; \<br>
- p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \<br>
- p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \<br>
- p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \<br>
- p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \<br>
+ p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \<br>
+ p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \<br>
+ p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \<br>
+ p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \<br>
p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_hv_pp_c<8, W, H>;<br>
<br>
void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)<br>
{<br>
LUMA(4, 4);<br>
LUMA(8, 8);<br>
- CHROMA(4, 4);<br>
+ CHROMA_420(4, 4);<br>
LUMA(4, 8);<br>
- CHROMA(2, 4);<br>
+ CHROMA_420(2, 4);<br>
LUMA(8, 4);<br>
- CHROMA(4, 2);<br>
+ CHROMA_420(4, 2);<br>
LUMA(16, 16);<br>
- CHROMA(8, 8);<br>
+ CHROMA_420(8, 8);<br>
LUMA(16, 8);<br>
- CHROMA(8, 4);<br>
+ CHROMA_420(8, 4);<br>
LUMA(8, 16);<br>
- CHROMA(4, 8);<br>
+ CHROMA_420(4, 8);<br>
LUMA(16, 12);<br>
- CHROMA(8, 6);<br>
+ CHROMA_420(8, 6);<br>
LUMA(12, 16);<br>
- CHROMA(6, 8);<br>
+ CHROMA_420(6, 8);<br>
LUMA(16, 4);<br>
- CHROMA(8, 2);<br>
+ CHROMA_420(8, 2);<br>
LUMA(4, 16);<br>
- CHROMA(2, 8);<br>
+ CHROMA_420(2, 8);<br>
LUMA(32, 32);<br>
- CHROMA(16, 16);<br>
+ CHROMA_420(16, 16);<br>
LUMA(32, 16);<br>
- CHROMA(16, 8);<br>
+ CHROMA_420(16, 8);<br>
LUMA(16, 32);<br>
- CHROMA(8, 16);<br>
+ CHROMA_420(8, 16);<br>
LUMA(32, 24);<br>
- CHROMA(16, 12);<br>
+ CHROMA_420(16, 12);<br>
LUMA(24, 32);<br>
- CHROMA(12, 16);<br>
+ CHROMA_420(12, 16);<br>
LUMA(32, 8);<br>
- CHROMA(16, 4);<br>
+ CHROMA_420(16, 4);<br>
LUMA(8, 32);<br>
- CHROMA(4, 16);<br>
+ CHROMA_420(4, 16);<br>
LUMA(64, 64);<br>
- CHROMA(32, 32);<br>
+ CHROMA_420(32, 32);<br>
LUMA(64, 32);<br>
- CHROMA(32, 16);<br>
+ CHROMA_420(32, 16);<br>
LUMA(32, 64);<br>
- CHROMA(16, 32);<br>
+ CHROMA_420(16, 32);<br>
LUMA(64, 48);<br>
- CHROMA(32, 24);<br>
+ CHROMA_420(32, 24);<br>
LUMA(48, 64);<br>
- CHROMA(24, 32);<br>
+ CHROMA_420(24, 32);<br>
LUMA(64, 16);<br>
- CHROMA(32, 8);<br>
+ CHROMA_420(32, 8);<br>
LUMA(16, 64);<br>
- CHROMA(8, 32);<br>
+ CHROMA_420(8, 32);<br>
+<br>
+ CHROMA_444(4, 4);<br>
+ CHROMA_444(8, 8);<br>
+ CHROMA_444(4, 8);<br>
+ CHROMA_444(8, 4);<br>
+ CHROMA_444(16, 16);<br>
+ CHROMA_444(16, 8);<br>
+ CHROMA_444(8, 16);<br>
+ CHROMA_444(16, 12);<br>
+ CHROMA_444(12, 16);<br>
+ CHROMA_444(16, 4);<br>
+ CHROMA_444(4, 16);<br>
+ CHROMA_444(32, 32);<br>
+ CHROMA_444(32, 16);<br>
+ CHROMA_444(16, 32);<br>
+ CHROMA_444(32, 24);<br>
+ CHROMA_444(24, 32);<br>
+ CHROMA_444(32, 8);<br>
+ CHROMA_444(8, 32);<br>
+ CHROMA_444(64, 64);<br>
+ CHROMA_444(64, 32);<br>
+ CHROMA_444(32, 64);<br>
+ CHROMA_444(64, 48);<br>
+ CHROMA_444(48, 64);<br>
+ CHROMA_444(64, 16);<br>
+ CHROMA_444(16, 64);<br>
<br>
p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_ps_c<8>;<br>
p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_ps_c<4>;<br>
@@ -525,7 +559,9 @@<br>
<br>
p.chroma_vsp = filterVertical_sp_c<4>;<br>
p.luma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;<br>
- p.chroma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;<br>
+<br>
+ p.chroma_p2s[X265_CSP_I444] = filterConvertPelToShort_c<MAX_CU_SIZE>;<br>
+ p.chroma_p2s[X265_CSP_I420] = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;<br>
<br>
p.extendRowBorder = extendCURowColBorder;<br>
}<br>
diff -r 8137881d4cad -r 019ad3c515b3 source/common/pixel.cpp<br>
--- a/source/common/pixel.cpp Thu Jan 02 16:18:35 2014 +0530<br>
+++ b/source/common/pixel.cpp Fri Jan 03 18:14:34 2014 +0530<br>
@@ -805,6 +805,27 @@<br>
namespace x265 {<br>
// x265 private namespace<br>
<br>
+#define CHROMA_420(W, H) \<br>
+ p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
+ p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
+ p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \<br>
+ p.chroma[X265_CSP_I420].sub_ps [CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \<br>
+ p.chroma[X265_CSP_I420].add_ps [CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;<br>
+<br>
+#define CHROMA_444(W, H) \<br>
+ p.chroma[X265_CSP_I444].copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
+ p.chroma[X265_CSP_I444].copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
+ p.chroma[X265_CSP_I444].copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \<br>
+ p.chroma[X265_CSP_I444].sub_ps [LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \<br>
+ p.chroma[X265_CSP_I444].add_ps [LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;<br>
+<br>
+#define LUMA(W, H) \<br>
+ p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
+ p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
+ p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \<br>
+ p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \<br>
+ p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;<br>
+<br>
/* It should initialize entries for pixel functions defined in this file. */<br>
void Setup_C_PixelPrimitives(EncoderPrimitives &p)<br>
{<br>
@@ -840,69 +861,81 @@<br>
p.satd[LUMA_64x16] = satd8<64, 16>;<br>
p.satd[LUMA_16x64] = satd8<16, 64>;<br>
<br>
-#define CHROMA(W, H) \<br>
- p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
- p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
- p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \<br>
- p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \<br>
- p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;<br>
-<br>
-#define LUMA(W, H) \<br>
- p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
- p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
- p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \<br>
- p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \<br>
- p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;<br>
-<br>
LUMA(4, 4);<br>
LUMA(8, 8);<br>
- CHROMA(4, 4);<br>
+ CHROMA_420(4, 4);<br>
LUMA(4, 8);<br>
- CHROMA(2, 4);<br>
+ CHROMA_420(2, 4);<br>
LUMA(8, 4);<br>
- CHROMA(4, 2);<br>
+ CHROMA_420(4, 2);<br>
LUMA(16, 16);<br>
- CHROMA(8, 8);<br>
+ CHROMA_420(8, 8);<br>
LUMA(16, 8);<br>
- CHROMA(8, 4);<br>
+ CHROMA_420(8, 4);<br>
LUMA(8, 16);<br>
- CHROMA(4, 8);<br>
+ CHROMA_420(4, 8);<br>
LUMA(16, 12);<br>
- CHROMA(8, 6);<br>
+ CHROMA_420(8, 6);<br>
LUMA(12, 16);<br>
- CHROMA(6, 8);<br>
+ CHROMA_420(6, 8);<br>
LUMA(16, 4);<br>
- CHROMA(8, 2);<br>
+ CHROMA_420(8, 2);<br>
LUMA(4, 16);<br>
- CHROMA(2, 8);<br>
+ CHROMA_420(2, 8);<br>
LUMA(32, 32);<br>
- CHROMA(16, 16);<br>
+ CHROMA_420(16, 16);<br>
LUMA(32, 16);<br>
- CHROMA(16, 8);<br>
+ CHROMA_420(16, 8);<br>
LUMA(16, 32);<br>
- CHROMA(8, 16);<br>
+ CHROMA_420(8, 16);<br>
LUMA(32, 24);<br>
- CHROMA(16, 12);<br>
+ CHROMA_420(16, 12);<br>
LUMA(24, 32);<br>
- CHROMA(12, 16);<br>
+ CHROMA_420(12, 16);<br>
LUMA(32, 8);<br>
- CHROMA(16, 4);<br>
+ CHROMA_420(16, 4);<br>
LUMA(8, 32);<br>
- CHROMA(4, 16);<br>
+ CHROMA_420(4, 16);<br>
LUMA(64, 64);<br>
- CHROMA(32, 32);<br>
+ CHROMA_420(32, 32);<br>
LUMA(64, 32);<br>
- CHROMA(32, 16);<br>
+ CHROMA_420(32, 16);<br>
LUMA(32, 64);<br>
- CHROMA(16, 32);<br>
+ CHROMA_420(16, 32);<br>
LUMA(64, 48);<br>
- CHROMA(32, 24);<br>
+ CHROMA_420(32, 24);<br>
LUMA(48, 64);<br>
- CHROMA(24, 32);<br>
+ CHROMA_420(24, 32);<br>
LUMA(64, 16);<br>
- CHROMA(32, 8);<br>
+ CHROMA_420(32, 8);<br>
LUMA(16, 64);<br>
- CHROMA(8, 32);<br>
+ CHROMA_420(8, 32);<br>
+<br>
+ CHROMA_444(4, 4);<br>
+ CHROMA_444(8, 8);<br>
+ CHROMA_444(4, 8);<br>
+ CHROMA_444(8, 4);<br>
+ CHROMA_444(16, 16);<br>
+ CHROMA_444(16, 8);<br>
+ CHROMA_444(8, 16);<br>
+ CHROMA_444(16, 12);<br>
+ CHROMA_444(12, 16);<br>
+ CHROMA_444(16, 4);<br>
+ CHROMA_444(4, 16);<br>
+ CHROMA_444(32, 32);<br>
+ CHROMA_444(32, 16);<br>
+ CHROMA_444(16, 32);<br>
+ CHROMA_444(32, 24);<br>
+ CHROMA_444(24, 32);<br>
+ CHROMA_444(32, 8);<br>
+ CHROMA_444(8, 32);<br>
+ CHROMA_444(64, 64);<br>
+ CHROMA_444(64, 32);<br>
+ CHROMA_444(32, 64);<br>
+ CHROMA_444(64, 48);<br>
+ CHROMA_444(48, 64);<br>
+ CHROMA_444(64, 16);<br>
+ CHROMA_444(16, 64);<br></blockquote><div><br></div><div>There's no need to setup 4:4:4 pixel primitives here if they are exact copies of the luma functions. This only makes the testbench take longer.</div><div>
</div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
SET_FUNC_PRIMITIVE_TABLE_C(sse_pp, sse, pixelcmp_t, pixel, pixel)<br>
SET_FUNC_PRIMITIVE_TABLE_C(sse_sp, sse, pixelcmp_sp_t, int16_t, pixel)<br>
diff -r 8137881d4cad -r 019ad3c515b3 source/common/primitives.h<br>
--- a/source/common/primitives.h Thu Jan 02 16:18:35 2014 +0530<br>
+++ b/source/common/primitives.h Fri Jan 03 18:14:34 2014 +0530<br>
@@ -75,7 +75,7 @@<br>
// 4:2:0 chroma partition sizes. These enums are just a convenience for indexing into the<br>
// chroma primitive arrays when instantiating templates. The function tables should always<br>
// be indexed by the luma partition enum<br>
-enum Chroma420Partions<br>
+enum Chroma420Partitions<br>
{<br>
CHROMA_2x2, // never used by HEVC<br>
CHROMA_4x4, CHROMA_4x2, CHROMA_2x4,<br>
@@ -240,7 +240,7 @@<br>
ipfilter_ps_t ipfilter_ps[NUM_IPFILTER_P_S];<br>
ipfilter_ss_t ipfilter_ss[NUM_IPFILTER_S_S];<br>
filter_p2s_t luma_p2s;<br>
- filter_p2s_t chroma_p2s;<br>
+ filter_p2s_t chroma_p2s[NUM_CHROMA_PARTITIONS];<br></blockquote><div><br></div><div>chroma_p2s should be moved into the per-colorspace struct</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
ipfilter_sp_t chroma_vsp;<br>
<br>
weightp_sp_t weight_sp;<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>