<div dir="ltr"><div># HG changeset patch</div><div># User Deepthi Nandakumar <<a href="mailto:deepthi@multicorewareinc.com">deepthi@multicorewareinc.com</a>></div><div># Date 1456592330 -19800</div><div># Sat Feb 27 22:28:50 2016 +0530</div><div># Node ID 631fe6745d9deaab1577b769c6e8feb79ca084da</div><div># Parent 107d56fa9b06ae9d240b0608d38d403ccc974b7a</div><div>primitives: change planeClipAndMax to calcStats_HDR, add YUV to RGB conversions</div><div><br></div><div>diff -r 107d56fa9b06 -r 631fe6745d9d source/common/constants.cpp</div><div>--- a/source/common/constants.cpp<span class="" style="white-space:pre"> </span>Sun Feb 28 14:22:40 2016 +0530</div><div>+++ b/source/common/constants.cpp<span class="" style="white-space:pre"> </span>Sat Feb 27 22:28:50 2016 +0530</div><div>@@ -568,4 +568,11 @@</div><div> { 42, 43, 46, 47, 58, 59, 62, 63, }</div><div> };</div><div> </div><div>+const double g_YUVtoRGB_BT2020[3][3] = </div><div>+{</div><div>+ { 1.00, 0.00, 1.47460, },</div><div>+ { 1.00, -0.16455, -0.57135, },</div><div>+ { 1.00, 1.88140, 0.00, }</div><div>+};</div><div>+</div><div> }</div><div>diff -r 107d56fa9b06 -r 631fe6745d9d source/common/constants.h</div><div>--- a/source/common/constants.h<span class="" style="white-space:pre"> </span>Sun Feb 28 14:22:40 2016 +0530</div><div>+++ b/source/common/constants.h<span class="" style="white-space:pre"> </span>Sat Feb 27 22:28:50 2016 +0530</div><div>@@ -98,6 +98,8 @@</div><div> </div><div> extern const uint32_t g_depthScanIdx[8][8];</div><div> </div><div>+extern const double g_YUVtoRGB_BT2020[3][3];</div><div>+</div><div> }</div><div> </div><div> #endif</div><div>diff -r 107d56fa9b06 -r 631fe6745d9d source/common/picyuv.cpp</div><div>--- a/source/common/picyuv.cpp<span class="" style="white-space:pre"> </span>Sun Feb 28 14:22:40 2016 +0530</div><div>+++ b/source/common/picyuv.cpp<span class="" style="white-space:pre"> </span>Sat Feb 27 22:28:50 2016 +0530</div><div>@@ -46,6 +46,8 @@</div><div> </div><div> m_maxLumaLevel = 0;</div><div> m_avgLumaLevel = 0;</div><div>+ m_stride = 0;</div><div>+ m_strideC = 0;</div><div> }</div><div> </div><div> bool PicYuv::create(uint32_t picWidth, uint32_t picHeight, uint32_t picCsp)</div><div>@@ -283,11 +285,13 @@</div><div> pixel *U = m_picOrg[1];</div><div> pixel *V = m_picOrg[2];</div><div> </div><div>+ bool calcHDRParams = !!param.maxLuma || !!param.minLuma || !!param.maxCLL;</div><div> /* Apply min/max luma bounds and calculate max and avg luma levels for HDR SEI messages */</div><div>- if (!!param.maxLuma || !!param.minLuma || !!param.maxCLL)</div><div>+ if (calcHDRParams)</div><div> {</div><div>+ X265_CHECK(pic.bitDepth == 10, "HDR stats can be applied/calculated only for 10bpp content");</div><div> uint64_t sumLuma;</div><div>- m_maxLumaLevel = primitives.planeClipAndMax(Y, m_stride, width, height, &sumLuma, (pixel)param.minLuma, (pixel)param.maxLuma);</div><div>+ primitives.calcHDRStats(Y, U, V, m_stride, m_strideC, width, height, &sumLuma, &m_maxLumaLevel, (pixel)param.minLuma, (pixel)param.maxLuma, m_hChromaShift, m_vChromaShift);</div><div> m_avgLumaLevel = (double)(sumLuma) / (m_picHeight * m_picWidth);</div><div> }</div><div> </div><div>diff -r 107d56fa9b06 -r 631fe6745d9d source/common/picyuv.h</div><div>--- a/source/common/picyuv.h<span class="" style="white-space:pre"> </span>Sun Feb 28 14:22:40 2016 +0530</div><div>+++ b/source/common/picyuv.h<span class="" style="white-space:pre"> </span>Sat Feb 27 22:28:50 2016 +0530</div><div>@@ -60,7 +60,7 @@</div><div> uint32_t m_chromaMarginX;</div><div> uint32_t m_chromaMarginY;</div><div> </div><div>- uint16_t m_maxLumaLevel;</div><div>+ pixel m_maxLumaLevel;</div><div> double m_avgLumaLevel;</div><div> </div><div> PicYuv();</div><div>diff -r 107d56fa9b06 -r 631fe6745d9d source/common/pixel.cpp</div><div>--- a/source/common/pixel.cpp<span class="" style="white-space:pre"> </span>Sun Feb 28 14:22:40 2016 +0530</div><div>+++ b/source/common/pixel.cpp<span class="" style="white-space:pre"> </span>Sat Feb 27 22:28:50 2016 +0530</div><div>@@ -873,28 +873,86 @@</div><div> }</div><div> }</div><div> </div><div>-static pixel planeClipAndMax_c(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix)</div><div>+static void calcHDRStats_c(pixel *srcY, pixel* srcU, pixel* srcV, intptr_t stride, intptr_t strideC, int width, int height, uint64_t *outsum, </div><div>+ pixel *outMax, const pixel minPix, const pixel maxPix, const int hShift, const int vShift)</div><div> {</div><div> pixel maxLumaLevel = 0;</div><div> uint64_t sumLuma = 0;</div><div>+ pixel rgb[3];</div><div> </div><div>- for (int r = 0; r < height; r++)</div><div>+ if (!hShift && !vShift) /* YUV444 */</div><div> {</div><div>- for (int c = 0; c < width; c++)</div><div>+ for (int r = 0; r < height; r++)</div><div> {</div><div>- /* Clip luma of source picture to max and min values before extending edges of picYuv */</div><div>- src[c] = x265_clip3((pixel)minPix, (pixel)maxPix, src[c]);</div><div>+ for (int c = 0; c < width; c++)</div><div>+ {</div><div>+ /* Clip luma of source picture to max and min */</div><div>+ srcY[c] = x265_clip3((pixel)minPix, (pixel)maxPix, srcY[c]);</div><div> </div><div>- /* Determine maximum and average luma level in a picture */</div><div>- maxLumaLevel = X265_MAX(src[c], maxLumaLevel);</div><div>- sumLuma += src[c];</div><div>+ /* Rec 2020 Yuv to RGB */</div><div>+ for (int i = 0; i < 3; i++)</div><div>+ rgb[i] = (pixel) (srcY[c] * g_YUVtoRGB_BT2020[i][0] + srcU[c] * g_YUVtoRGB_BT2020[i][1] + srcV[c] * g_YUVtoRGB_BT2020[i][2]);</div><div>+ /* maxCLL and maxFALL */</div><div>+ maxLumaLevel = X265_MAX(maxLumaLevel, X265_MAX(rgb[0], X265_MAX(rgb[1], rgb[2])));</div><div>+ sumLuma += maxLumaLevel;</div><div>+ }</div><div>+ srcY += stride; srcU += strideC; srcV += strideC;</div><div> }</div><div>-</div><div>- src += stride;</div><div> }</div><div>-</div><div>+ else if (hShift && !vShift) /* YUV422 */</div><div>+ {</div><div>+ for (int r = 0; r < height; r++)</div><div>+ {</div><div>+ for (int c = 0; c < width >> hShift; c++)</div><div>+ {</div><div>+ srcY[2*c] = x265_clip3((pixel)minPix, (pixel)maxPix, srcY[2*c]);</div><div>+ srcY[2*c + 1] = x265_clip3((pixel)minPix, (pixel)maxPix, srcY[2*c + 1]);</div><div>+ pixel y = (srcY[2*c] + srcY[2*c + 1]) >> 1;</div><div>+ for (int i = 0; i < 3; i++)</div><div>+ rgb[i] = (pixel)(y * g_YUVtoRGB_BT2020[i][0] + srcU[c] * g_YUVtoRGB_BT2020[i][1] + srcV[c] * g_YUVtoRGB_BT2020[i][2]);</div><div>+ maxLumaLevel = X265_MAX(maxLumaLevel, X265_MAX(rgb[0], X265_MAX(rgb[1], rgb[2])));</div><div>+ sumLuma += maxLumaLevel;</div><div>+ }</div><div>+ srcY += stride; srcU += strideC; srcV += strideC;</div><div>+ }</div><div>+ }</div><div>+ else if (hShift && vShift) /* YUV420 */</div><div>+ {</div><div>+ for (int r = 0; r < height >> vShift; r++)</div><div>+ {</div><div>+ for (int c = 0; c < width >> vShift; c++)</div><div>+ {</div><div>+ srcY[2*c] = x265_clip3((pixel)minPix, (pixel)maxPix, srcY[2*c]);</div><div>+ srcY[2*c + 1] = x265_clip3((pixel)minPix, (pixel)maxPix, srcY[2*c + 1]);</div><div>+ srcY[stride + 2*c] = x265_clip3((pixel)minPix, (pixel)maxPix, srcY[stride + 2*c]);</div><div>+ srcY[stride + 2*c + 1] = x265_clip3((pixel)minPix, (pixel)maxPix, srcY[stride + 2*c + 1]);</div><div>+ pixel y = (srcY[2*c] + srcY[2*c + 1] + srcY[stride + 2*c] + srcY[stride + 2*c + 1]) >> 2;</div><div>+ for (int i = 0; i < 3; i++)</div><div>+ rgb[i] = (pixel) (y * g_YUVtoRGB_BT2020[i][0] + srcU[c] * g_YUVtoRGB_BT2020[i][1] + srcV[c] * g_YUVtoRGB_BT2020[i][2]);</div><div>+ maxLumaLevel = X265_MAX(maxLumaLevel, X265_MAX(rgb[0], X265_MAX(rgb[1], rgb[2])));</div><div>+ sumLuma += maxLumaLevel;</div><div>+ }</div><div>+ srcY += (stride << 1); srcU += strideC; srcV += strideC;</div><div>+ }</div><div>+ }</div><div>+ else if (!strideC) /* YUV400 */</div><div>+ {</div><div>+ for (int r = 0; r < height; r++)</div><div>+ {</div><div>+ for (int c = 0; c < width; c++)</div><div>+ {</div><div>+ srcY[c] = x265_clip3((pixel)minPix, (pixel)maxPix, srcY[c]);</div><div>+ for (int i = 0; i < 3; i++)</div><div>+ rgb[i] = (pixel) (srcY[c] * g_YUVtoRGB_BT2020[i][0]);</div><div>+ /* maxCLL and maxFALL */</div><div>+ maxLumaLevel = X265_MAX(maxLumaLevel, X265_MAX(rgb[0], X265_MAX(rgb[1], rgb[2])));</div><div>+ sumLuma += maxLumaLevel;</div><div>+ }</div><div>+ srcY += stride;</div><div>+ }</div><div>+ }</div><div> *outsum = sumLuma;</div><div>- return maxLumaLevel;</div><div>+ *outMax = maxLumaLevel;</div><div> }</div><div> </div><div> } // end anonymous namespace</div><div>@@ -1181,7 +1239,7 @@</div><div> p.planecopy_cp = planecopy_cp_c;</div><div> p.planecopy_sp = planecopy_sp_c;</div><div> p.planecopy_sp_shl = planecopy_sp_shl_c;</div><div>- p.planeClipAndMax = planeClipAndMax_c;</div><div>+ p.calcHDRStats = calcHDRStats_c;</div><div> p.propagateCost = estimateCUPropagateCost;</div><div> }</div><div> }</div><div>diff -r 107d56fa9b06 -r 631fe6745d9d source/common/primitives.h</div><div>--- a/source/common/primitives.h<span class="" style="white-space:pre"> </span>Sun Feb 28 14:22:40 2016 +0530</div><div>+++ b/source/common/primitives.h<span class="" style="white-space:pre"> </span>Sat Feb 27 22:28:50 2016 +0530</div><div>@@ -185,7 +185,7 @@</div><div> typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);</div><div> typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);</div><div> typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);</div><div>-typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix);</div><div>+typedef void (*calcHDRStats_t)(pixel *srcY, pixel* srcU, pixel* srcV, intptr_t stride, intptr_t strideC, int width, int height, uint64_t *outsum, pixel *outMax, const pixel minPix, const pixel maxPix, const int hShift, const int vShift);</div><div> </div><div> typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len);</div><div> </div><div>@@ -318,7 +318,7 @@</div><div> planecopy_cp_t planecopy_cp;</div><div> planecopy_sp_t planecopy_sp;</div><div> planecopy_sp_t planecopy_sp_shl;</div><div>- planeClipAndMax_t planeClipAndMax;</div><div>+ calcHDRStats_t calcHDRStats;</div><div> </div><div> weightp_sp_t weight_sp;</div><div> weightp_pp_t weight_pp;</div><div>diff -r 107d56fa9b06 -r 631fe6745d9d source/common/x86/asm-primitives.cpp</div><div>--- a/source/common/x86/asm-primitives.cpp<span class="" style="white-space:pre"> </span>Sun Feb 28 14:22:40 2016 +0530</div><div>+++ b/source/common/x86/asm-primitives.cpp<span class="" style="white-space:pre"> </span>Sat Feb 27 22:28:50 2016 +0530</div><div>@@ -3658,7 +3658,6 @@</div><div> p.chroma[X265_CSP_I420].cu[CHROMA_420_32x32].copy_ps = PFX(blockcopy_ps_32x32_avx2);</div><div> p.chroma[X265_CSP_I422].cu[CHROMA_422_32x64].copy_ps = PFX(blockcopy_ps_32x64_avx2);</div><div> <a href="http://p.cu">p.cu</a>[BLOCK_64x64].copy_ps = PFX(blockcopy_ps_64x64_avx2);</div><div>- p.planeClipAndMax = PFX(planeClipAndMax_avx2);</div><div> </div><div> p.pu[LUMA_32x8].sad_x3 = PFX(pixel_sad_x3_32x8_avx2);</div><div> p.pu[LUMA_32x16].sad_x3 = PFX(pixel_sad_x3_32x16_avx2);</div><div>diff -r 107d56fa9b06 -r 631fe6745d9d source/common/x86/pixel.h</div><div>--- a/source/common/x86/pixel.h<span class="" style="white-space:pre"> </span>Sun Feb 28 14:22:40 2016 +0530</div><div>+++ b/source/common/x86/pixel.h<span class="" style="white-space:pre"> </span>Sat Feb 27 22:28:50 2016 +0530</div><div>@@ -36,7 +36,6 @@</div><div> void PFX(upShift_16_avx2)(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);</div><div> void PFX(upShift_8_sse4)(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);</div><div> void PFX(upShift_8_avx2)(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);</div><div>-pixel PFX(planeClipAndMax_avx2)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix);</div><div> </div><div> #define DECL_PIXELS(cpu) \</div><div> FUNCDEF_PU(sse_t, pixel_ssd, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \</div><div>diff -r 107d56fa9b06 -r 631fe6745d9d source/test/pixelharness.cpp</div><div>--- a/source/test/pixelharness.cpp<span class="" style="white-space:pre"> </span>Sun Feb 28 14:22:40 2016 +0530</div><div>+++ b/source/test/pixelharness.cpp<span class="" style="white-space:pre"> </span>Sat Feb 27 22:28:50 2016 +0530</div><div>@@ -1818,34 +1818,6 @@</div><div> return true;</div><div> }</div><div> </div><div>-bool PixelHarness::check_planeClipAndMax(planeClipAndMax_t ref, planeClipAndMax_t opt)</div><div>-{</div><div>- for (int i = 0; i < ITERS; i++)</div><div>- {</div><div>- intptr_t rand_stride = rand() % STRIDE;</div><div>- int rand_width = (rand() % (STRIDE * 2)) + 1;</div><div>- const int rand_height = (rand() % MAX_HEIGHT) + 1;</div><div>- const pixel rand_min = rand() % 32;</div><div>- const pixel rand_max = PIXEL_MAX - (rand() % 32);</div><div>- uint64_t ref_sum, opt_sum;</div><div>-</div><div>- // video width must be more than or equal to 32</div><div>- if (rand_width < 32)</div><div>- rand_width = 32;</div><div>-</div><div>- // stride must be more than or equal to width</div><div>- if (rand_stride < rand_width)</div><div>- rand_stride = rand_width;</div><div>-</div><div>- pixel ref_max = ref(pbuf1, rand_stride, rand_width, rand_height, &ref_sum, rand_min, rand_max);</div><div>- pixel opt_max = (pixel)checked(opt, pbuf1, rand_stride, rand_width, rand_height, &opt_sum, rand_min, rand_max);</div><div>-</div><div>- if (ref_max != opt_max)</div><div>- return false;</div><div>- }</div><div>- return true;</div><div>-}</div><div>-</div><div> bool PixelHarness::check_pelFilterLumaStrong_H(pelFilterLumaStrong_t ref, pelFilterLumaStrong_t opt)</div><div> {</div><div> intptr_t srcStep = 1, offset = 64;</div><div>@@ -2543,15 +2515,6 @@</div><div> }</div><div> </div><div> </div><div>- if (opt.planeClipAndMax)</div><div>- {</div><div>- if (!check_planeClipAndMax(ref.planeClipAndMax, opt.planeClipAndMax))</div><div>- {</div><div>- printf("planeClipAndMax failed!\n");</div><div>- return false;</div><div>- }</div><div>- }</div><div>-</div><div> if (opt.pelFilterLumaStrong[0])</div><div> {</div><div> if (!check_pelFilterLumaStrong_V(ref.pelFilterLumaStrong[0], opt.pelFilterLumaStrong[0]))</div><div>@@ -3047,13 +3010,6 @@</div><div> REPORT_SPEEDUP(opt.costC1C2Flag, ref.costC1C2Flag, abscoefBuf, C1FLAG_NUMBER, (uint8_t*)psbuf1, 1);</div><div> }</div><div> </div><div>- if (opt.planeClipAndMax)</div><div>- {</div><div>- HEADER0("planeClipAndMax");</div><div>- uint64_t dummy;</div><div>- REPORT_SPEEDUP(opt.planeClipAndMax, ref.planeClipAndMax, pbuf1, 128, 63, 62, &dummy, 1, PIXEL_MAX - 1);</div><div>- }</div><div>-</div><div> if (opt.pelFilterLumaStrong[0])</div><div> {</div><div> int32_t tcP = (rand() % PIXEL_MAX) - 1;</div><div>diff -r 107d56fa9b06 -r 631fe6745d9d source/test/pixelharness.h</div><div>--- a/source/test/pixelharness.h<span class="" style="white-space:pre"> </span>Sun Feb 28 14:22:40 2016 +0530</div><div>+++ b/source/test/pixelharness.h<span class="" style="white-space:pre"> </span>Sat Feb 27 22:28:50 2016 +0530</div><div>@@ -120,7 +120,6 @@</div><div> bool check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt);</div><div> bool check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt);</div><div> bool check_costC1C2Flag(costC1C2Flag_t ref, costC1C2Flag_t opt);</div><div>- bool check_planeClipAndMax(planeClipAndMax_t ref, planeClipAndMax_t opt);</div><div> bool check_pelFilterLumaStrong_V(pelFilterLumaStrong_t ref, pelFilterLumaStrong_t opt);</div><div> bool check_pelFilterLumaStrong_H(pelFilterLumaStrong_t ref, pelFilterLumaStrong_t opt);</div><div> </div><div><br></div><div><br></div>-- <br><div class="gmail_signature"><div dir="ltr"><div><div>Deepthi Nandakumar<br></div>Engineering Manager, x265<br></div>Multicoreware, Inc<br></div></div>
</div>