[x265] [PATCH] Modify primitives to support multiple color space formats

ashok at multicorewareinc.com ashok at multicorewareinc.com
Fri Jan 3 13:45:01 CET 2014


# HG changeset patch
# User ashok at multicorewareinc.com
# Date 1388753074 -19800
#      Fri Jan 03 18:14:34 2014 +0530
# Node ID 019ad3c515b3219497dfa51bd8f8c3a709b7ec5d
# Parent  8137881d4cad4555e1128320d62dd56dd24ed3dc
Modify primitives to support multiple color space formats

diff -r 8137881d4cad -r 019ad3c515b3 source/common/TShortYUV.h
--- a/source/common/TShortYUV.h	Thu Jan 02 16:18:35 2014 +0530
+++ b/source/common/TShortYUV.h	Fri Jan 03 18:14:34 2014 +0530
@@ -87,9 +87,9 @@
     //  Access starting position of YUV partition unit buffer
     int16_t* getLumaAddr(unsigned int partUnitIdx) { return m_bufY + getAddrOffset(partUnitIdx, m_width); }
 
-    int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb + (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }
+    int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb + (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }
 
-    int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr + (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }
+    int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr + (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }
 
     //  Access starting position of YUV transform unit buffer
     int16_t* getLumaAddr(unsigned int partIdx, unsigned int size) { return m_bufY + getAddrOffset(partIdx, size, m_width); }
diff -r 8137881d4cad -r 019ad3c515b3 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp	Thu Jan 02 16:18:35 2014 +0530
+++ b/source/common/ipfilter.cpp	Fri Jan 03 18:14:34 2014 +0530
@@ -449,74 +449,108 @@
 namespace x265 {
 // x265 private namespace
 
-#define CHROMA(W, H) \
+#define CHROMA_420(W, H) \
     p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
     p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
-    p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
-    p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
-    p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
+    p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>;  \
+    p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>;  \
+    p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>;  \
     p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
 
+#define CHROMA_444(W, H) \
+    p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
+    p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
+    p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>;  \
+    p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>;  \
+    p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>;  \
+    p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
+
 #define LUMA(W, H) \
     p.luma_hpp[LUMA_ ## W ## x ## H]     = interp_horiz_pp_c<8, W, H>; \
     p.luma_hps[LUMA_ ## W ## x ## H]     = interp_horiz_ps_c<8, W, H>; \
-    p.luma_vpp[LUMA_ ## W ## x ## H]     = interp_vert_pp_c<8, W, H>; \
-    p.luma_vps[LUMA_ ## W ## x ## H]     = interp_vert_ps_c<8, W, H>; \
-    p.luma_vsp[LUMA_ ## W ## x ## H]     = interp_vert_sp_c<8, W, H>; \
-    p.luma_vss[LUMA_ ## W ## x ## H]     = interp_vert_ss_c<8, W, H>; \
+    p.luma_vpp[LUMA_ ## W ## x ## H]     = interp_vert_pp_c<8, W, H>;  \
+    p.luma_vps[LUMA_ ## W ## x ## H]     = interp_vert_ps_c<8, W, H>;  \
+    p.luma_vsp[LUMA_ ## W ## x ## H]     = interp_vert_sp_c<8, W, H>;  \
+    p.luma_vss[LUMA_ ## W ## x ## H]     = interp_vert_ss_c<8, W, H>;  \
     p.luma_hvpp[LUMA_ ## W ## x ## H]    = interp_hv_pp_c<8, W, H>;
 
 void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
 {
     LUMA(4, 4);
     LUMA(8, 8);
-    CHROMA(4, 4);
+    CHROMA_420(4,  4);
     LUMA(4, 8);
-    CHROMA(2, 4);
+    CHROMA_420(2,  4);
     LUMA(8, 4);
-    CHROMA(4, 2);
+    CHROMA_420(4,  2);
     LUMA(16, 16);
-    CHROMA(8, 8);
+    CHROMA_420(8,  8);
     LUMA(16,  8);
-    CHROMA(8, 4);
+    CHROMA_420(8,  4);
     LUMA(8, 16);
-    CHROMA(4, 8);
+    CHROMA_420(4,  8);
     LUMA(16, 12);
-    CHROMA(8, 6);
+    CHROMA_420(8,  6);
     LUMA(12, 16);
-    CHROMA(6, 8);
+    CHROMA_420(6,  8);
     LUMA(16,  4);
-    CHROMA(8, 2);
+    CHROMA_420(8,  2);
     LUMA(4, 16);
-    CHROMA(2, 8);
+    CHROMA_420(2,  8);
     LUMA(32, 32);
-    CHROMA(16, 16);
+    CHROMA_420(16, 16);
     LUMA(32, 16);
-    CHROMA(16, 8);
+    CHROMA_420(16, 8);
     LUMA(16, 32);
-    CHROMA(8, 16);
+    CHROMA_420(8,  16);
     LUMA(32, 24);
-    CHROMA(16, 12);
+    CHROMA_420(16, 12);
     LUMA(24, 32);
-    CHROMA(12, 16);
+    CHROMA_420(12, 16);
     LUMA(32,  8);
-    CHROMA(16, 4);
+    CHROMA_420(16, 4);
     LUMA(8, 32);
-    CHROMA(4, 16);
+    CHROMA_420(4,  16);
     LUMA(64, 64);
-    CHROMA(32, 32);
+    CHROMA_420(32, 32);
     LUMA(64, 32);
-    CHROMA(32, 16);
+    CHROMA_420(32, 16);
     LUMA(32, 64);
-    CHROMA(16, 32);
+    CHROMA_420(16, 32);
     LUMA(64, 48);
-    CHROMA(32, 24);
+    CHROMA_420(32, 24);
     LUMA(48, 64);
-    CHROMA(24, 32);
+    CHROMA_420(24, 32);
     LUMA(64, 16);
-    CHROMA(32, 8);
+    CHROMA_420(32, 8);
     LUMA(16, 64);
-    CHROMA(8, 32);
+    CHROMA_420(8,  32);
+
+    CHROMA_444(4,  4);
+    CHROMA_444(8,  8);
+    CHROMA_444(4,  8);
+    CHROMA_444(8,  4);
+    CHROMA_444(16, 16);
+    CHROMA_444(16, 8);
+    CHROMA_444(8,  16);
+    CHROMA_444(16, 12);
+    CHROMA_444(12, 16);
+    CHROMA_444(16, 4);
+    CHROMA_444(4,  16);
+    CHROMA_444(32, 32);
+    CHROMA_444(32, 16);
+    CHROMA_444(16, 32);
+    CHROMA_444(32, 24);
+    CHROMA_444(24, 32);
+    CHROMA_444(32, 8);
+    CHROMA_444(8,  32);
+    CHROMA_444(64, 64);
+    CHROMA_444(64, 32);
+    CHROMA_444(32, 64);
+    CHROMA_444(64, 48);
+    CHROMA_444(48, 64);
+    CHROMA_444(64, 16);
+    CHROMA_444(16, 64);
 
     p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_ps_c<8>;
     p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_ps_c<4>;
@@ -525,7 +559,9 @@
 
     p.chroma_vsp = filterVertical_sp_c<4>;
     p.luma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
-    p.chroma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
+
+    p.chroma_p2s[X265_CSP_I444] = filterConvertPelToShort_c<MAX_CU_SIZE>;
+    p.chroma_p2s[X265_CSP_I420] = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
 
     p.extendRowBorder = extendCURowColBorder;
 }
diff -r 8137881d4cad -r 019ad3c515b3 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Thu Jan 02 16:18:35 2014 +0530
+++ b/source/common/pixel.cpp	Fri Jan 03 18:14:34 2014 +0530
@@ -805,6 +805,27 @@
 namespace x265 {
 // x265 private namespace
 
+#define CHROMA_420(W, H) \
+    p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
+    p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
+    p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
+    p.chroma[X265_CSP_I420].sub_ps [CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
+    p.chroma[X265_CSP_I420].add_ps [CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+
+#define CHROMA_444(W, H) \
+    p.chroma[X265_CSP_I444].copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
+    p.chroma[X265_CSP_I444].copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
+    p.chroma[X265_CSP_I444].copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
+    p.chroma[X265_CSP_I444].sub_ps [LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
+    p.chroma[X265_CSP_I444].add_ps [LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+
+#define LUMA(W, H) \
+    p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
+    p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
+    p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
+    p.luma_sub_ps[LUMA_ ## W ## x ## H]  = pixel_sub_ps_c<W, H>; \
+    p.luma_add_ps[LUMA_ ## W ## x ## H]  = pixel_add_ps_c<W, H>;
+
 /* It should initialize entries for pixel functions defined in this file. */
 void Setup_C_PixelPrimitives(EncoderPrimitives &p)
 {
@@ -840,69 +861,81 @@
     p.satd[LUMA_64x16] = satd8<64, 16>;
     p.satd[LUMA_16x64] = satd8<16, 64>;
 
-#define CHROMA(W, H) \
-    p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
-    p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
-    p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
-    p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
-    p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
-
-#define LUMA(W, H) \
-    p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
-    p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
-    p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
-    p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
-    p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
-
     LUMA(4, 4);
     LUMA(8, 8);
-    CHROMA(4, 4);
+    CHROMA_420(4, 4);
     LUMA(4, 8);
-    CHROMA(2, 4);
+    CHROMA_420(2, 4);
     LUMA(8, 4);
-    CHROMA(4, 2);
+    CHROMA_420(4, 2);
     LUMA(16, 16);
-    CHROMA(8, 8);
+    CHROMA_420(8,  8);
     LUMA(16,  8);
-    CHROMA(8, 4);
+    CHROMA_420(8,  4);
     LUMA(8, 16);
-    CHROMA(4, 8);
+    CHROMA_420(4,  8);
     LUMA(16, 12);
-    CHROMA(8, 6);
+    CHROMA_420(8,  6);
     LUMA(12, 16);
-    CHROMA(6, 8);
+    CHROMA_420(6,  8);
     LUMA(16,  4);
-    CHROMA(8, 2);
+    CHROMA_420(8,  2);
     LUMA(4, 16);
-    CHROMA(2, 8);
+    CHROMA_420(2,  8);
     LUMA(32, 32);
-    CHROMA(16, 16);
+    CHROMA_420(16, 16);
     LUMA(32, 16);
-    CHROMA(16, 8);
+    CHROMA_420(16, 8);
     LUMA(16, 32);
-    CHROMA(8, 16);
+    CHROMA_420(8,  16);
     LUMA(32, 24);
-    CHROMA(16, 12);
+    CHROMA_420(16, 12);
     LUMA(24, 32);
-    CHROMA(12, 16);
+    CHROMA_420(12, 16);
     LUMA(32,  8);
-    CHROMA(16, 4);
+    CHROMA_420(16, 4);
     LUMA(8, 32);
-    CHROMA(4, 16);
+    CHROMA_420(4,  16);
     LUMA(64, 64);
-    CHROMA(32, 32);
+    CHROMA_420(32, 32);
     LUMA(64, 32);
-    CHROMA(32, 16);
+    CHROMA_420(32, 16);
     LUMA(32, 64);
-    CHROMA(16, 32);
+    CHROMA_420(16, 32);
     LUMA(64, 48);
-    CHROMA(32, 24);
+    CHROMA_420(32, 24);
     LUMA(48, 64);
-    CHROMA(24, 32);
+    CHROMA_420(24, 32);
     LUMA(64, 16);
-    CHROMA(32, 8);
+    CHROMA_420(32, 8);
     LUMA(16, 64);
-    CHROMA(8, 32);
+    CHROMA_420(8,  32);
+
+    CHROMA_444(4,  4);
+    CHROMA_444(8,  8);
+    CHROMA_444(4,  8);
+    CHROMA_444(8,  4);
+    CHROMA_444(16, 16);
+    CHROMA_444(16, 8);
+    CHROMA_444(8,  16);
+    CHROMA_444(16, 12);
+    CHROMA_444(12, 16);
+    CHROMA_444(16, 4);
+    CHROMA_444(4,  16);
+    CHROMA_444(32, 32);
+    CHROMA_444(32, 16);
+    CHROMA_444(16, 32);
+    CHROMA_444(32, 24);
+    CHROMA_444(24, 32);
+    CHROMA_444(32, 8);
+    CHROMA_444(8,  32);
+    CHROMA_444(64, 64);
+    CHROMA_444(64, 32);
+    CHROMA_444(32, 64);
+    CHROMA_444(64, 48);
+    CHROMA_444(48, 64);
+    CHROMA_444(64, 16);
+    CHROMA_444(16, 64);
 
     SET_FUNC_PRIMITIVE_TABLE_C(sse_pp, sse, pixelcmp_t, pixel, pixel)
     SET_FUNC_PRIMITIVE_TABLE_C(sse_sp, sse, pixelcmp_sp_t, int16_t, pixel)
diff -r 8137881d4cad -r 019ad3c515b3 source/common/primitives.h
--- a/source/common/primitives.h	Thu Jan 02 16:18:35 2014 +0530
+++ b/source/common/primitives.h	Fri Jan 03 18:14:34 2014 +0530
@@ -75,7 +75,7 @@
 // 4:2:0 chroma partition sizes. These enums are just a convenience for indexing into the
 // chroma primitive arrays when instantiating templates. The function tables should always
 // be indexed by the luma partition enum
-enum Chroma420Partions
+enum Chroma420Partitions
 {
     CHROMA_2x2, // never used by HEVC
     CHROMA_4x4,   CHROMA_4x2,   CHROMA_2x4,
@@ -240,7 +240,7 @@
     ipfilter_ps_t   ipfilter_ps[NUM_IPFILTER_P_S];
     ipfilter_ss_t   ipfilter_ss[NUM_IPFILTER_S_S];
     filter_p2s_t    luma_p2s;
-    filter_p2s_t    chroma_p2s;
+    filter_p2s_t    chroma_p2s[NUM_CHROMA_PARTITIONS];
     ipfilter_sp_t   chroma_vsp;
 
     weightp_sp_t    weight_sp;


More information about the x265-devel mailing list