[x265] [PATCH] primitives: added C primitives for upShift/downShift input pixels
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Thu Mar 13 11:45:33 CET 2014
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1394693311 -19800
# Thu Mar 13 12:18:31 2014 +0530
# Node ID 481bca8b54d000d1d5fd2bcff242e5d97b7551e7
# Parent 5328eec595543c1294cb34b133b4e36f14e2bb79
primitives: added C primitives for upShift/downShift input pixels
diff -r 5328eec59554 -r 481bca8b54d0 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp Wed Mar 12 16:01:25 2014 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp Thu Mar 13 12:18:31 2014 +0530
@@ -169,11 +169,11 @@
int height = m_picHeight - pady;
/* internal pad to multiple of 16x16 blocks */
- uint8_t rem = width & 15;
+ uint8_t rem = height & 15;
+ pady = rem ? 16 - rem : pady;
+ rem = width & 15;
padx = rem ? 16 - rem : padx;
- rem = height & 15;
- pady = rem ? 16 - rem : pady;
/* add one more row and col of pad for downscale interpolation, fixes
* warnings from valgrind about using uninitialized pixels */
@@ -193,29 +193,44 @@
uint8_t *uChar = (uint8_t*)pic.planes[1];
uint8_t *vChar = (uint8_t*)pic.planes[2];
- for (int r = 0; r < height; r++)
+ int lumaWidth = width - rem;
+ int chromaWidth = width >> m_hChromaShift;
+ uint8_t chromaRem = chromaWidth & 15;
+ chromaWidth = chromaWidth - chromaRem;
+
+ primitives.upShift(yChar, pic.stride[0] / sizeof(*yChar), yPixel, getStride(), lumaWidth, height);
+ primitives.upShift(uChar, pic.stride[1] / sizeof(*uChar), uPixel, getCStride(), chromaWidth, height >> m_vChromaShift);
+ primitives.upShift(vChar, pic.stride[2] / sizeof(*vChar), vPixel, getCStride(), chromaWidth, height >> m_vChromaShift);
+
+ if (rem)
{
- for (int c = 0; c < width; c++)
+ for (int r = 0; r < height; r++)
{
- yPixel[c] = ((pixel)yChar[c]) << 2;
+ for (int c = lumaWidth; c < width; c++)
+ {
+ yPixel[c] = ((pixel)yChar[c]) << 2;
+ }
+
+ yPixel += getStride();
+ yChar += pic.stride[0] / sizeof(*yChar);
}
-
- yPixel += getStride();
- yChar += pic.stride[0] / sizeof(*yChar);
}
- for (int r = 0; r < height >> m_vChromaShift; r++)
+ if (chromaRem)
{
- for (int c = 0; c < width >> m_hChromaShift; c++)
+ for (int r = 0; r < height >> m_vChromaShift; r++)
{
- uPixel[c] = ((pixel)uChar[c]) << 2;
- vPixel[c] = ((pixel)vChar[c]) << 2;
+ for (int c = chromaWidth; c < width >> m_hChromaShift; c++)
+ {
+ uPixel[c] = ((pixel)uChar[c]) << 2;
+ vPixel[c] = ((pixel)vChar[c]) << 2;
+ }
+
+ uPixel += getCStride();
+ vPixel += getCStride();
+ uChar += pic.stride[1] / sizeof(*uChar);
+ vChar += pic.stride[2] / sizeof(*vChar);
}
-
- uPixel += getCStride();
- vPixel += getCStride();
- uChar += pic.stride[1] / sizeof(*uChar);
- vChar += pic.stride[2] / sizeof(*vChar);
}
}
else if (pic.bitDepth == 8)
@@ -266,31 +281,48 @@
/* defensive programming, mask off bits that are supposed to be zero */
uint16_t mask = (1 << X265_DEPTH) - 1;
int shift = X265_MAX(0, pic.bitDepth - X265_DEPTH);
+ int lumaWidth = width - rem;
+ int chromaWidth = width >> m_hChromaShift;
+ uint8_t chromaRem = chromaWidth & 15;
+ chromaWidth = chromaWidth - chromaRem;
/* shift and mask pixels to final size */
- for (int r = 0; r < height; r++)
+
+ primitives.downShift(yShort, pic.stride[0] / sizeof(*yShort), yPixel, getStride(), lumaWidth, height, shift, mask);
+ primitives.downShift(uShort, pic.stride[1] / sizeof(*uShort), uPixel, getCStride(), chromaWidth, height >> m_vChromaShift, shift, mask);
+ primitives.downShift(vShort, pic.stride[2] / sizeof(*vShort), vPixel, getCStride(), chromaWidth, height >> m_vChromaShift, shift, mask);
+
+ /*Handles remining part of luma component if the width not multiple of 16*/
+ if (rem)
{
- for (int c = 0; c < width; c++)
+ for (int r = 0; r < height; r++)
{
- yPixel[c] = (pixel)((yShort[c] >> shift) & mask);
+ for (int c = lumaWidth; c < width; c++)
+ {
+ yPixel[c] = (pixel)((yShort[c] >> shift) & mask);
+ }
+
+ yPixel += getStride();
+ yShort += pic.stride[0] / sizeof(*yShort);
}
-
- yPixel += getStride();
- yShort += pic.stride[0] / sizeof(*yShort);
}
- for (int r = 0; r < height >> m_vChromaShift; r++)
+ /*Handles remining part of chroma component if the width not multiple of 16*/
+ if (chromaRem)
{
- for (int c = 0; c < width >> m_hChromaShift; c++)
+ for (int r = 0; r < height >> m_vChromaShift; r++)
{
- uPixel[c] = (pixel)((uShort[c] >> shift) & mask);
- vPixel[c] = (pixel)((vShort[c] >> shift) & mask);
+ for (int c = chromaWidth; c < width >> m_hChromaShift; c++)
+ {
+ uPixel[c] = (pixel)((uShort[c] >> shift) & mask);
+ vPixel[c] = (pixel)((vShort[c] >> shift) & mask);
+ }
+
+ uPixel += getCStride();
+ vPixel += getCStride();
+ uShort += pic.stride[1] / sizeof(*uShort);
+ vShort += pic.stride[2] / sizeof(*vShort);
}
-
- uPixel += getCStride();
- vPixel += getCStride();
- uShort += pic.stride[1] / sizeof(*uShort);
- vShort += pic.stride[2] / sizeof(*vShort);
}
}
diff -r 5328eec59554 -r 481bca8b54d0 source/common/pixel.cpp
--- a/source/common/pixel.cpp Wed Mar 12 16:01:25 2014 -0500
+++ b/source/common/pixel.cpp Thu Mar 13 12:18:31 2014 +0530
@@ -852,6 +852,34 @@
dst += dstStride;
}
}
+
+void upShift(uint8_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height)
+{
+ for (int r = 0; r < height; r++)
+ {
+ for (int c = 0; c < width; c++)
+ {
+ dst[c] = ((pixel)src[c]) << 2;
+ }
+
+ dst += dstStride;
+ src += srcStride;
+ }
+}
+
+void downShift(uint16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
+{
+ for (int r = 0; r < height; r++)
+ {
+ for (int c = 0; c < width; c++)
+ {
+ dst[c] = (pixel)((src[c] >> shift) & mask);
+ }
+
+ dst += dstStride;
+ src += srcStride;
+ }
+}
} // end anonymous namespace
namespace x265 {
@@ -1099,5 +1127,7 @@
p.var[BLOCK_32x32] = pixel_var<32>;
p.var[BLOCK_64x64] = pixel_var<64>;
p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
+ p.upShift = upShift;
+ p.downShift = downShift;
}
}
diff -r 5328eec59554 -r 481bca8b54d0 source/common/primitives.h
--- a/source/common/primitives.h Wed Mar 12 16:01:25 2014 -0500
+++ b/source/common/primitives.h Thu Mar 13 12:18:31 2014 +0530
@@ -163,6 +163,8 @@
typedef void (*addAvg_t)(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride);
typedef void (*saoCuOrgE0_t)(pixel * rec, int8_t * offsetEo, int lcuWidth, int8_t signLeft);
+typedef void (*planecopy_cp) (uint8_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height);
+typedef void (*planecopy_sp) (uint16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
/* Define a structure containing function pointers to optimized encoder
* primitives. Each pointer can reference either an assembly routine,
@@ -233,6 +235,8 @@
extendCURowBorder_t extendRowBorder;
// sao primitives
saoCuOrgE0_t saoCuOrgE0;
+ planecopy_cp upShift;
+ planecopy_sp downShift;
struct
{
More information about the x265-devel
mailing list