[vlc-commits] chroma: copy: add a way to shift bits for 16 bits conversion
Thomas Guillem
git at videolan.org
Fri Mar 16 16:08:15 CET 2018
vlc | branch: master | Thomas Guillem <thomas at gllm.fr> | Fri Mar 16 09:06:33 2018 +0100| [ae93aaa1f36c81faee712557398b84c2148ae746] | committer: Thomas Guillem
chroma: copy: add a way to shift bits for 16 bits conversion
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=ae93aaa1f36c81faee712557398b84c2148ae746
---
modules/hw/vaapi/chroma.c | 4 +-
modules/video_chroma/copy.c | 350 ++++++++++++++++++++++++++++++++------------
modules/video_chroma/copy.h | 8 +-
3 files changed, 264 insertions(+), 98 deletions(-)
diff --git a/modules/hw/vaapi/chroma.c b/modules/hw/vaapi/chroma.c
index 3e5aef35db..5fc965283a 100644
--- a/modules/hw/vaapi/chroma.c
+++ b/modules/hw/vaapi/chroma.c
@@ -110,7 +110,7 @@ FillPictureFromVAImage(picture_t *dest,
break;
case VLC_CODEC_I420_10B:
Copy420_16_SP_to_P(dest, src_planes, src_pitches,
- src_img->height, cache);
+ src_img->height, 0, cache);
break;
default:
vlc_assert_unreachable();
@@ -219,7 +219,7 @@ FillVAImageFromPicture(VAImage *dest_img, uint8_t *dest_buf,
case VLC_CODEC_I420_10B:
assert(dest_pic->format.i_chroma == VLC_CODEC_VAAPI_420_10BPP);
Copy420_16_P_to_SP(dest_pic, src_planes, src_pitches,
- src->format.i_height, cache);
+ src->format.i_height, 0, cache);
break;
case VLC_CODEC_P010:
{
diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c
index de4c241d25..1895218037 100644
--- a/modules/video_chroma/copy.c
+++ b/modules/video_chroma/copy.c
@@ -36,6 +36,9 @@
#include <assert.h>
#include "copy.h"
+static void CopyPlane(uint8_t *dst, size_t dst_pitch,
+ const uint8_t *src, size_t src_pitch,
+ unsigned height, int bitshift);
#define ASSERT_PLANE(i) assert(src[i]); \
assert(src_pitch[i])
@@ -77,24 +80,48 @@ void CopyCleanCache(copy_cache_t *cache)
/* Copy 16/64 bytes from srcp to dstp loading data with the SSE>=2 instruction
* load and storing data with the SSE>=2 instruction store.
*/
-#define COPY16(dstp, srcp, load, store) \
+
+#define COPY16_SHIFTR(x) \
+ "psrlw "x", %%xmm1\n"
+#define COPY16_SHIFTL(x) \
+ "psllw "x", %%xmm1\n"
+
+#define COPY16_S(dstp, srcp, load, store, shiftstr) \
asm volatile ( \
load " 0(%[src]), %%xmm1\n" \
+ shiftstr \
store " %%xmm1, 0(%[dst])\n" \
: : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1")
-#define COPY64(dstp, srcp, load, store) \
+#define COPY16(dstp, srcp, load, store) COPY16_S(dstp, srcp, load, store, "")
+
+#define COPY64_SHIFTR(x) \
+ "psrlw "x", %%xmm1\n" \
+ "psrlw "x", %%xmm2\n" \
+ "psrlw "x", %%xmm3\n" \
+ "psrlw "x", %%xmm4\n"
+#define COPY64_SHIFTL(x) \
+ "psllw "x", %%xmm1\n" \
+ "psllw "x", %%xmm2\n" \
+ "psllw "x", %%xmm3\n" \
+ "psllw "x", %%xmm4\n"
+
+#define COPY64_S(dstp, srcp, load, store, shiftstr) \
asm volatile ( \
load " 0(%[src]), %%xmm1\n" \
load " 16(%[src]), %%xmm2\n" \
load " 32(%[src]), %%xmm3\n" \
load " 48(%[src]), %%xmm4\n" \
+ shiftstr \
store " %%xmm1, 0(%[dst])\n" \
store " %%xmm2, 16(%[dst])\n" \
store " %%xmm3, 32(%[dst])\n" \
store " %%xmm4, 48(%[dst])\n" \
: : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1", "xmm2", "xmm3", "xmm4")
+#define COPY64(dstp, srcp, load, store) \
+ COPY64_S(dstp, srcp, load, store, "")
+
#ifdef COPY_TEST_NOOPTIM
# undef vlc_CPU_SSE4_1
# define vlc_CPU_SSE4_1() (0)
@@ -113,45 +140,70 @@ void CopyCleanCache(copy_cache_t *cache)
VLC_SSE
static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
const uint8_t *src, size_t src_pitch,
- unsigned width, unsigned height)
+ unsigned width, unsigned height, int bitshift)
{
assert(((intptr_t)dst & 0x0f) == 0 && (dst_pitch & 0x0f) == 0);
asm volatile ("mfence");
- for (unsigned y = 0; y < height; y++) {
- const unsigned unaligned = (-(uintptr_t)src) & 0x0f;
- unsigned x = unaligned;
-
-#ifdef CAN_COMPILE_SSE4_1
- if (vlc_CPU_SSE4_1()) {
- if (!unaligned) {
- for (; x+63 < width; x += 64)
- COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
- } else {
- COPY16(dst, src, "movdqu", "movdqa");
- for (; x+63 < width; x += 64)
- COPY64(&dst[x], &src[x], "movntdqa", "movdqu");
- }
- } else
-#endif
- {
- if (!unaligned) {
- for (; x+63 < width; x += 64)
- COPY64(&dst[x], &src[x], "movdqa", "movdqa");
- } else {
- COPY16(dst, src, "movdqu", "movdqa");
- for (; x+63 < width; x += 64)
- COPY64(&dst[x], &src[x], "movdqa", "movdqu");
- }
- }
-
- for (; x < width; x++)
- dst[x] = src[x];
+#define SSE_USWC_COPY(shiftstr16, shiftstr64) \
+ for (unsigned y = 0; y < height; y++) { \
+ const unsigned unaligned = (-(uintptr_t)src) & 0x0f; \
+ unsigned x = unaligned; \
+ if (vlc_CPU_SSE4_1()) { \
+ if (!unaligned) { \
+ for (; x+63 < width; x += 64) \
+ COPY64_S(&dst[x], &src[x], "movntdqa", "movdqa", shiftstr64); \
+ } else { \
+ COPY16_S(dst, src, "movdqu", "movdqa", shiftstr16); \
+ for (; x+63 < width; x += 64) \
+ COPY64_S(&dst[x], &src[x], "movntdqa", "movdqu", shiftstr64); \
+ } \
+ } else { \
+ if (!unaligned) { \
+ for (; x+63 < width; x += 64) \
+ COPY64_S(&dst[x], &src[x], "movdqa", "movdqa", shiftstr64); \
+ } else { \
+ COPY16_S(dst, src, "movdqu", "movdqa", shiftstr16); \
+ for (; x+63 < width; x += 64) \
+ COPY64_S(&dst[x], &src[x], "movdqa", "movdqu", shiftstr64); \
+ } \
+ } \
+ /* The following should not happen since buffers are generally well aligned */ \
+ if (x < width) \
+ CopyPlane(&dst[x], dst_pitch - x, &src[x], src_pitch - x, 1, bitshift); \
+ src += src_pitch; \
+ dst += dst_pitch; \
+ }
- src += src_pitch;
- dst += dst_pitch;
+ switch (bitshift)
+ {
+ case 0:
+ SSE_USWC_COPY("", "")
+ break;
+ case -6:
+ SSE_USWC_COPY(COPY16_SHIFTL("$6"), COPY64_SHIFTL("$6"))
+ break;
+ case 6:
+ SSE_USWC_COPY(COPY16_SHIFTR("$6"), COPY64_SHIFTR("$6"))
+ break;
+ case 2:
+ SSE_USWC_COPY(COPY16_SHIFTR("$2"), COPY64_SHIFTR("$2"))
+ break;
+ case -2:
+ SSE_USWC_COPY(COPY16_SHIFTL("$2"), COPY64_SHIFTL("$2"))
+ break;
+ case 4:
+ SSE_USWC_COPY(COPY16_SHIFTR("$4"), COPY64_SHIFTR("$4"))
+ break;
+ case -4:
+ SSE_USWC_COPY(COPY16_SHIFTL("$2"), COPY64_SHIFTL("$2"))
+ break;
+ default:
+ vlc_assert_unreachable();
}
+#undef SSE_USWC_COPY
+
asm volatile ("mfence");
}
@@ -412,21 +464,21 @@ static void SSE_SplitUV(uint8_t *dstu, size_t dstu_pitch,
static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
const uint8_t *src, size_t src_pitch,
uint8_t *cache, size_t cache_size,
- unsigned height)
+ unsigned height, int bitshift)
{
const unsigned w16 = (src_pitch+15) & ~15;
const unsigned hstep = cache_size / w16;
assert(hstep > 0);
/* If SSE4.1: CopyFromUswc is faster than memcpy */
- if (!vlc_CPU_SSE4_1() && src_pitch == dst_pitch)
+ if (!vlc_CPU_SSE4_1() && bitshift == 0 && src_pitch == dst_pitch)
memcpy(dst, src, src_pitch * height);
else
for (unsigned y = 0; y < height; y += hstep) {
const unsigned hblock = __MIN(hstep, height - y);
/* Copy a bunch of line into our cache */
- CopyFromUswc(cache, w16, src, src_pitch, src_pitch, hblock);
+ CopyFromUswc(cache, w16, src, src_pitch, src_pitch, hblock, bitshift);
/* Copy from our cache to the destination */
Copy2d(dst, dst_pitch, cache, w16, src_pitch, hblock);
@@ -442,7 +494,7 @@ SSE_InterleavePlanes(uint8_t *dst, size_t dst_pitch,
const uint8_t *srcu, size_t srcu_pitch,
const uint8_t *srcv, size_t srcv_pitch,
uint8_t *cache, size_t cache_size,
- unsigned int height, uint8_t pixel_size)
+ unsigned int height, uint8_t pixel_size, int bitshift)
{
assert(srcu_pitch == srcv_pitch);
unsigned int const w16 = (srcu_pitch+15) & ~15;
@@ -454,9 +506,9 @@ SSE_InterleavePlanes(uint8_t *dst, size_t dst_pitch,
unsigned int const hblock = __MIN(hstep, height - y);
/* Copy a bunch of line into our cache */
- CopyFromUswc(cache, w16, srcu, srcu_pitch, srcu_pitch, hblock);
+ CopyFromUswc(cache, w16, srcu, srcu_pitch, srcu_pitch, hblock, bitshift);
CopyFromUswc(cache+w16*hblock, w16, srcv, srcv_pitch,
- srcv_pitch, hblock);
+ srcv_pitch, hblock, bitshift);
/* Copy from our cache to the destination */
SSE_InterleaveUV(dst, dst_pitch, cache, w16,
@@ -474,7 +526,7 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
uint8_t *dstv, size_t dstv_pitch,
const uint8_t *src, size_t src_pitch,
uint8_t *cache, size_t cache_size,
- unsigned height, uint8_t pixel_size)
+ unsigned height, uint8_t pixel_size, int bitshift)
{
const unsigned w16 = (src_pitch+15) & ~15;
const unsigned hstep = cache_size / w16;
@@ -484,7 +536,7 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
const unsigned hblock = __MIN(hstep, height - y);
/* Copy a bunch of line into our cache */
- CopyFromUswc(cache, w16, src, src_pitch, src_pitch, hblock);
+ CopyFromUswc(cache, w16, src, src_pitch, src_pitch, hblock, bitshift);
/* Copy from our cache to the destination */
SSE_SplitUV(dstu, dstu_pitch, dstv, dstv_pitch,
@@ -506,7 +558,7 @@ static void SSE_Copy420_P_to_P(picture_t *dst, const uint8_t *src[static 3],
SSE_CopyPlane(dst->p[n].p_pixels, dst->p[n].i_pitch,
src[n], src_pitch[n],
cache->buffer, cache->size,
- (height+d-1)/d);
+ (height+d-1)/d, 0);
}
asm volatile ("emms");
}
@@ -517,37 +569,38 @@ static void SSE_Copy420_SP_to_SP(picture_t *dst, const uint8_t *src[static 2],
const copy_cache_t *cache)
{
SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch, src[0], src_pitch[0],
- cache->buffer, cache->size, height);
+ cache->buffer, cache->size, height, 0);
SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch, src[1], src_pitch[1],
- cache->buffer, cache->size, height / 2);
+ cache->buffer, cache->size, height / 2, 0);
asm volatile ("emms");
}
static void
SSE_Copy420_SP_to_P(picture_t *dest, const uint8_t *src[static 2],
const size_t src_pitch[static 2], unsigned int height,
- const copy_cache_t *cache, uint8_t pixel_size)
+ uint8_t pixel_size, int bitshift, const copy_cache_t *cache)
{
SSE_CopyPlane(dest->p[0].p_pixels, dest->p[0].i_pitch,
- src[0], src_pitch[0], cache->buffer, cache->size, height);
+ src[0], src_pitch[0], cache->buffer, cache->size, height, bitshift);
+
SSE_SplitPlanes(dest->p[1].p_pixels, dest->p[1].i_pitch,
dest->p[2].p_pixels, dest->p[2].i_pitch,
src[1], src_pitch[1], cache->buffer, cache->size,
- height / 2, pixel_size);
+ height / 2, pixel_size, bitshift);
asm volatile ("emms");
}
static void SSE_Copy420_P_to_SP(picture_t *dst, const uint8_t *src[static 3],
const size_t src_pitch[static 3],
- unsigned height, const copy_cache_t *cache,
- uint8_t pixel_size)
+ unsigned height, uint8_t pixel_size,
+ int bitshift, const copy_cache_t *cache)
{
SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch, src[0], src_pitch[0],
- cache->buffer, cache->size, height);
+ cache->buffer, cache->size, height, bitshift);
SSE_InterleavePlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
src[U_PLANE], src_pitch[U_PLANE],
src[V_PLANE], src_pitch[V_PLANE],
- cache->buffer, cache->size, height / 2, pixel_size);
+ cache->buffer, cache->size, height / 2, pixel_size, bitshift);
asm volatile ("emms");
}
#undef COPY64
@@ -555,9 +608,26 @@ static void SSE_Copy420_P_to_SP(picture_t *dst, const uint8_t *src[static 3],
static void CopyPlane(uint8_t *dst, size_t dst_pitch,
const uint8_t *src, size_t src_pitch,
- unsigned height)
+ unsigned height, int bitshift)
{
- if (src_pitch == dst_pitch)
+ if (bitshift != 0)
+ {
+ for (unsigned y = 0; y < height; y++)
+ {
+ uint16_t *dst16 = (uint16_t *) dst;
+ const uint16_t *src16 = (const uint16_t *) src;
+
+ if (bitshift > 0)
+ for (unsigned x = 0; x < (src_pitch / 2); x++)
+ *dst16++ = (*src16++) >> (bitshift & 0xf);
+ else
+ for (unsigned x = 0; x < (src_pitch / 2); x++)
+ *dst16++ = (*src16++) << ((-bitshift) & 0xf);
+ src += src_pitch;
+ dst += dst_pitch;
+ }
+ }
+ else if (src_pitch == dst_pitch)
memcpy(dst, src, src_pitch * height);
else
for (unsigned y = 0; y < height; y++) {
@@ -580,9 +650,9 @@ void Copy420_SP_to_SP(picture_t *dst, const uint8_t *src[static 2],
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0], height);
+ src[0], src_pitch[0], height, 0);
CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
- src[1], src_pitch[1], height/2);
+ src[1], src_pitch[1], height/2, 0);
}
#define SPLIT_PLANES(type, pitch_den) do { \
@@ -597,6 +667,30 @@ void Copy420_SP_to_SP(picture_t *dst, const uint8_t *src[static 2],
} \
} while(0)
+#define SPLIT_PLANES_SHIFTR(type, pitch_den, bitshift) do { \
+ for (unsigned y = 0; y < height; y++) { \
+ for (unsigned x = 0; x < src_pitch / pitch_den; x++) { \
+ ((type *) dstu)[x] = (((const type *) src)[2*x+0]) >> (bitshift); \
+ ((type *) dstv)[x] = (((const type *) src)[2*x+1]) >> (bitshift); \
+ } \
+ src += src_pitch; \
+ dstu += dstu_pitch; \
+ dstv += dstv_pitch; \
+ } \
+} while(0)
+
+#define SPLIT_PLANES_SHIFTL(type, pitch_den, bitshift) do { \
+ for (unsigned y = 0; y < height; y++) { \
+ for (unsigned x = 0; x < src_pitch / pitch_den; x++) { \
+ ((type *) dstu)[x] = (((const type *) src)[2*x+0]) << (bitshift); \
+ ((type *) dstv)[x] = (((const type *) src)[2*x+1]) << (bitshift); \
+ } \
+ src += src_pitch; \
+ dstu += dstu_pitch; \
+ dstv += dstv_pitch; \
+ } \
+} while(0)
+
static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
uint8_t *dstv, size_t dstv_pitch,
const uint8_t *src, size_t src_pitch, unsigned height)
@@ -606,9 +700,15 @@ static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
static void SplitPlanes16(uint8_t *dstu, size_t dstu_pitch,
uint8_t *dstv, size_t dstv_pitch,
- const uint8_t *src, size_t src_pitch, unsigned height)
+ const uint8_t *src, size_t src_pitch, unsigned height,
+ int bitshift)
{
- SPLIT_PLANES(uint16_t, 4);
+ if (bitshift == 0)
+ SPLIT_PLANES(uint16_t, 4);
+ else if (bitshift > 0)
+ SPLIT_PLANES_SHIFTR(uint16_t, 4, bitshift & 0xf);
+ else
+ SPLIT_PLANES_SHIFTL(uint16_t, 4, (-bitshift) & 0xf);
}
void Copy420_SP_to_P(picture_t *dst, const uint8_t *src[static 2],
@@ -618,13 +718,13 @@ void Copy420_SP_to_P(picture_t *dst, const uint8_t *src[static 2],
ASSERT_2PLANES;
#ifdef CAN_COMPILE_SSE2
if (vlc_CPU_SSE2())
- return SSE_Copy420_SP_to_P(dst, src, src_pitch, height, cache, 1);
+ return SSE_Copy420_SP_to_P(dst, src, src_pitch, height, 1, 0, cache);
#else
VLC_UNUSED(cache);
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0], height);
+ src[0], src_pitch[0], height, 0);
SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
dst->p[2].p_pixels, dst->p[2].i_pitch,
src[1], src_pitch[1], height/2);
@@ -632,21 +732,23 @@ void Copy420_SP_to_P(picture_t *dst, const uint8_t *src[static 2],
void Copy420_16_SP_to_P(picture_t *dst, const uint8_t *src[static 2],
const size_t src_pitch[static 2], unsigned height,
- const copy_cache_t *cache)
+ int bitshift, const copy_cache_t *cache)
{
ASSERT_2PLANES;
+ assert(bitshift >= -6 && bitshift <= 6 && (bitshift % 2 == 0));
+
#ifdef CAN_COMPILE_SSE3
if (vlc_CPU_SSSE3())
- return SSE_Copy420_SP_to_P(dst, src, src_pitch, height, cache, 2);
+ return SSE_Copy420_SP_to_P(dst, src, src_pitch, height, 2, bitshift, cache);
#else
VLC_UNUSED(cache);
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0], height);
+ src[0], src_pitch[0], height, bitshift);
SplitPlanes16(dst->p[1].p_pixels, dst->p[1].i_pitch,
dst->p[2].p_pixels, dst->p[2].i_pitch,
- src[1], src_pitch[1], height/2);
+ src[1], src_pitch[1], height/2, bitshift);
}
#define INTERLEAVE_UV() do { \
@@ -661,6 +763,30 @@ void Copy420_16_SP_to_P(picture_t *dst, const uint8_t *src[static 2],
} \
}while(0)
+#define INTERLEAVE_UV_SHIFTR(bitshitf) do { \
+ for ( unsigned int line = 0; line < copy_lines; line++ ) { \
+ for ( unsigned int col = 0; col < copy_pitch; col++ ) { \
+ *dstUV++ = (*srcU++) >> (bitshitf); \
+ *dstUV++ = (*srcV++) >> (bitshitf); \
+ } \
+ dstUV += i_extra_pitch_uv; \
+ srcU += i_extra_pitch_u; \
+ srcV += i_extra_pitch_v; \
+ } \
+}while(0)
+
+#define INTERLEAVE_UV_SHIFTL(bitshitf) do { \
+ for ( unsigned int line = 0; line < copy_lines; line++ ) { \
+ for ( unsigned int col = 0; col < copy_pitch; col++ ) { \
+ *dstUV++ = (*srcU++) << (bitshitf); \
+ *dstUV++ = (*srcV++) << (bitshitf); \
+ } \
+ dstUV += i_extra_pitch_uv; \
+ srcU += i_extra_pitch_u; \
+ srcV += i_extra_pitch_v; \
+ } \
+}while(0)
+
void Copy420_P_to_SP(picture_t *dst, const uint8_t *src[static 3],
const size_t src_pitch[static 3], unsigned height,
const copy_cache_t *cache)
@@ -668,13 +794,13 @@ void Copy420_P_to_SP(picture_t *dst, const uint8_t *src[static 3],
ASSERT_3PLANES;
#ifdef CAN_COMPILE_SSE2
if (vlc_CPU_SSE2())
- return SSE_Copy420_P_to_SP(dst, src, src_pitch, height, cache, 1);
+ return SSE_Copy420_P_to_SP(dst, src, src_pitch, height, 1, 0, cache);
#else
(void) cache;
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0], height);
+ src[0], src_pitch[0], height, 0);
const unsigned copy_lines = height / 2;
const unsigned copy_pitch = src_pitch[1];
@@ -691,18 +817,19 @@ void Copy420_P_to_SP(picture_t *dst, const uint8_t *src[static 3],
void Copy420_16_P_to_SP(picture_t *dst, const uint8_t *src[static 3],
const size_t src_pitch[static 3], unsigned height,
- const copy_cache_t *cache)
+ int bitshift, const copy_cache_t *cache)
{
ASSERT_3PLANES;
+ assert(bitshift >= -6 && bitshift <= 6 && (bitshift % 2 == 0));
#ifdef CAN_COMPILE_SSE2
if (vlc_CPU_SSSE3())
- return SSE_Copy420_P_to_SP(dst, src, src_pitch, height, cache, 2);
+ return SSE_Copy420_P_to_SP(dst, src, src_pitch, height, 2, bitshift, cache);
#else
(void) cache;
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0], height);
+ src[0], src_pitch[0], height, bitshift);
const unsigned copy_lines = height / 2;
const unsigned copy_pitch = src_pitch[1] / 2;
@@ -714,7 +841,13 @@ void Copy420_16_P_to_SP(picture_t *dst, const uint8_t *src[static 3],
uint16_t *dstUV = (void*) dst->p[1].p_pixels;
const uint16_t *srcU = (const uint16_t *) src[U_PLANE];
const uint16_t *srcV = (const uint16_t *) src[V_PLANE];
- INTERLEAVE_UV();
+
+ if (bitshift == 0)
+ INTERLEAVE_UV();
+ else if (bitshift > 0)
+ INTERLEAVE_UV_SHIFTR(bitshift & 0xf);
+ else
+ INTERLEAVE_UV_SHIFTL((-bitshift) & 0xf);
}
void CopyFromI420_10ToP010(picture_t *dst, const uint8_t *src[static 3],
@@ -771,11 +904,11 @@ void Copy420_P_to_P(picture_t *dst, const uint8_t *src[static 3],
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0], height);
+ src[0], src_pitch[0], height, 0);
CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
- src[1], src_pitch[1], height / 2);
+ src[1], src_pitch[1], height / 2, 0);
CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
- src[2], src_pitch[2], height / 2);
+ src[2], src_pitch[2], height / 2, 0);
}
int picture_UpdatePlanes(picture_t *picture, uint8_t *data, unsigned pitch)
@@ -837,8 +970,14 @@ int picture_UpdatePlanes(picture_t *picture, uint8_t *data, unsigned pitch)
struct test_dst
{
vlc_fourcc_t chroma;
- void (*conv)(picture_t *, const uint8_t *[], const size_t [], unsigned,
- const copy_cache_t *);
+ int bitshift;
+ union
+ {
+ void (*conv)(picture_t *, const uint8_t *[], const size_t [], unsigned,
+ const copy_cache_t *);
+ void (*conv16)(picture_t *, const uint8_t *[], const size_t [], unsigned, int,
+ const copy_cache_t *);
+ };
};
struct test_conv
@@ -849,18 +988,18 @@ struct test_conv
static const struct test_conv convs[] = {
{ .src_chroma = VLC_CODEC_NV12,
- .dsts = { { VLC_CODEC_I420, Copy420_SP_to_P },
- { VLC_CODEC_NV12, Copy420_SP_to_SP } },
+ .dsts = { { VLC_CODEC_I420, 0, .conv = Copy420_SP_to_P },
+ { VLC_CODEC_NV12, 0, .conv = Copy420_SP_to_SP } },
},
{ .src_chroma = VLC_CODEC_I420,
- .dsts = { { VLC_CODEC_I420, Copy420_P_to_P },
- { VLC_CODEC_NV12, Copy420_P_to_SP } },
+ .dsts = { { VLC_CODEC_I420, 0, .conv = Copy420_P_to_P },
+ { VLC_CODEC_NV12, 0, .conv = Copy420_P_to_SP } },
},
{ .src_chroma = VLC_CODEC_P010,
- .dsts = { { VLC_CODEC_I420_10B, Copy420_16_SP_to_P } },
+ .dsts = { { VLC_CODEC_I420_10L, 6, .conv16 = Copy420_16_SP_to_P } },
},
- { .src_chroma = VLC_CODEC_I420_10B,
- .dsts = { { VLC_CODEC_P010, Copy420_16_P_to_SP } },
+ { .src_chroma = VLC_CODEC_I420_10L,
+ .dsts = { { VLC_CODEC_P010, -6, .conv16 = Copy420_16_P_to_SP } },
},
};
#define NB_CONVS ARRAY_SIZE(convs)
@@ -889,8 +1028,8 @@ static const struct test_size sizes[] = {
static void piccheck(picture_t *pic, const vlc_chroma_description_t *dsc,
bool init)
{
-#define ASSERT_COLOR() do { \
- fprintf(stderr, "error: pixel doesn't match @ plane: %d: %d x %d: %X\n", i, x, y, *(--p)); \
+#define ASSERT_COLOR(good) do { \
+ fprintf(stderr, "error: pixel doesn't match @ plane: %d: %d x %d: 0x%X vs 0x%X\n", i, x, y, *(--p), good); \
assert(!"error: pixel doesn't match"); \
} while(0)
@@ -907,7 +1046,7 @@ static void piccheck(picture_t *pic, const vlc_chroma_description_t *dsc,
if (init) \
*(p++) = color_UV; \
else if (*(p++) != color_UV) \
- ASSERT_COLOR(); \
+ ASSERT_COLOR(color_UV); \
} \
else \
{ \
@@ -916,24 +1055,42 @@ static void piccheck(picture_t *pic, const vlc_chroma_description_t *dsc,
if (init) \
*(p++) = colors_P[i]; \
else if (*(p++) != colors_P[i]) \
- ASSERT_COLOR(); \
+ ASSERT_COLOR(colors_P[i]); \
} \
} \
} \
} while (0)
assert(pic->i_planes == 2 || pic->i_planes == 3);
- const uint8_t colors_8_P[3] = { 0x42, 0xF1, 0x36 };
- const uint16_t color_8_UV = ntoh16(0xF136);
-
- const uint16_t colors_16_P[3] = { ntoh16(0x1042), ntoh16(0xF114), ntoh16(0x3645) };
- const uint32_t color_16_UV = ntoh32(0xF1143645);
-
assert(dsc->pixel_size == 1 || dsc->pixel_size == 2);
+
if (dsc->pixel_size == 1)
+ {
+ const uint8_t colors_8_P[3] = { 0x42, 0xF1, 0x36 };
+ const uint16_t color_8_UV = ntoh16(0xF136);
PICCHECK(uint8_t, uint16_t, colors_8_P, color_8_UV, 1);
+ }
else
+ {
+ const unsigned mask = (1 << dsc->pixel_bits) - 1;
+ uint16_t colors_16_P[3] = { 0x1042 &mask, 0xF114 &mask, 0x3645 &mask};
+
+ switch (pic->format.i_chroma)
+ {
+ case VLC_CODEC_P010:
+ for (size_t i = 0; i < 3; ++i)
+ colors_16_P[i] <<= 6;
+ break;
+ case VLC_CODEC_I420_10L:
+ break;
+ default:
+ vlc_assert_unreachable();
+ }
+
+ uint32_t color_16_UV = (colors_16_P[2] << 16) | colors_16_P[1];
+
PICCHECK(uint16_t, uint32_t, colors_16_P, color_16_UV, 2);
+ }
}
static void pic_rsc_destroy(picture_t *pic)
@@ -1022,8 +1179,13 @@ int main(void)
size->i_visible_width, size->i_visible_height,
(const char *) &src->format.i_chroma,
(const char *) &dst->format.i_chroma);
- test_dst->conv(dst, src_planes, src_pitches,
- src->format.i_visible_height, &cache);
+ if (test_dst->bitshift == 0)
+ test_dst->conv(dst, src_planes, src_pitches,
+ src->format.i_visible_height, &cache);
+ else
+ test_dst->conv16(dst, src_planes, src_pitches,
+ src->format.i_visible_height, test_dst->bitshift,
+ &cache);
piccheck(dst, dst_dsc, false);
picture_Release(dst);
}
diff --git a/modules/video_chroma/copy.h b/modules/video_chroma/copy.h
index 296d1a6734..b2ced779c8 100644
--- a/modules/video_chroma/copy.h
+++ b/modules/video_chroma/copy.h
@@ -56,13 +56,17 @@ void Copy420_SP_to_P(picture_t *dst, const uint8_t *src[static 2],
const size_t src_pitch[static 2], unsigned height,
const copy_cache_t *cache);
+/* Copy planes from I420_10 to P010. A positive bitshift value will shift bits
+ * to the right, a negative value will shift to the left. */
void Copy420_16_P_to_SP(picture_t *dst, const uint8_t *src[static 3],
const size_t src_pitch[static 3], unsigned height,
- const copy_cache_t *cache);
+ int bitshift, const copy_cache_t *cache);
+/* Copy planes from P010 to I420_10. A positive bitshift value will shift bits
+ * to the right, a negative value will shift to the left. */
void Copy420_16_SP_to_P(picture_t *dst, const uint8_t *src[static 2],
const size_t src_pitch[static 2], unsigned height,
- const copy_cache_t *cache);
+ int bitshift, const copy_cache_t *cache);
/* XXX: Not optimized copy (no SEE) */
void CopyFromI420_10ToP010(picture_t *dst, const uint8_t *src[static 3],
More information about the vlc-commits
mailing list