[x265] [PATCH] Fix for C code mismatch
Steve Borho
steve at borho.org
Thu Nov 20 17:44:43 CET 2014
On 11/20, praveen at multicorewareinc.com wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1416494042 -19800
> # Node ID b2b6021639875ebe7398d7725592206c1f33e613
> # Parent ed587d360b97624d8dc5b738d08029ffe4f8d92d
> Fix for C code mismatch
>
> This patch is for fix the the binary mismatch in incoded output intoduced during
> refactorizaton of the transform/quant path. Basically it is original version of
> code to make sure all valid inputs are copied in input buffer, in other hand
> it is not fully optimized code but this patch is quick fix for the problem and
> allow us to optimze one function at a time.
queued with tpyos fixed
> diff -r ed587d360b97 -r b2b602163987 source/common/dct.cpp
> --- a/source/common/dct.cpp Thu Nov 20 18:25:09 2014 +0900
> +++ b/source/common/dct.cpp Thu Nov 20 20:04:02 2014 +0530
> @@ -454,51 +454,129 @@
> }
>
> fastForwardDst(block, coef, shift_1st);
> - fastForwardDst(coef, dst, shift_2nd);
> + fastForwardDst(coef, block, shift_2nd);
> +
> +#define N (4)
> + for (int i = 0; i < N; i++)
> + {
> + for (int j = 0; j < N; j++)
> + {
> + dst[i * N + j] = block[i * N + j];
> + }
> + }
> +
> +#undef N
> }
>
> -void dct4_c(const int16_t *src, int16_t *dst, intptr_t /* stride */)
> +void dct4_c(const int16_t *src, int16_t *dst, intptr_t stride)
> {
> const int shift_1st = 1 + X265_DEPTH - 8;
> const int shift_2nd = 8;
>
> ALIGN_VAR_32(int16_t, coef[4 * 4]);
> + ALIGN_VAR_32(int16_t, block[4 * 4]);
>
> - partialButterfly4(src, coef, shift_1st, 4);
> - partialButterfly4(coef, dst, shift_2nd, 4);
> + for (int i = 0; i < 4; i++)
> + {
> + memcpy(&block[i * 4], &src[i * stride], 4 * sizeof(int16_t));
> + }
> +
> + partialButterfly4(block, coef, shift_1st, 4);
> + partialButterfly4(coef, block, shift_2nd, 4);
> +#define N (4)
> + for (int i = 0; i < N; i++)
> + {
> + for (int j = 0; j < N; j++)
> + {
> + dst[i * N + j] = block[i * N + j];
> + }
> + }
> +
> +#undef N
> }
>
> -void dct8_c(const int16_t *src, int16_t *dst, intptr_t /* stride */)
> +void dct8_c(const int16_t *src, int16_t *dst, intptr_t stride)
> {
> const int shift_1st = 2 + X265_DEPTH - 8;
> const int shift_2nd = 9;
>
> ALIGN_VAR_32(int16_t, coef[8 * 8]);
> + ALIGN_VAR_32(int16_t, block[8 * 8]);
>
> - partialButterfly8(src, coef, shift_1st, 8);
> - partialButterfly8(coef, dst, shift_2nd, 8);
> + for (int i = 0; i < 8; i++)
> + {
> + memcpy(&block[i * 8], &src[i * stride], 8 * sizeof(int16_t));
> + }
> +
> + partialButterfly8(block, coef, shift_1st, 8);
> + partialButterfly8(coef, block, shift_2nd, 8);
> +
> +#define N (8)
> + for (int i = 0; i < N; i++)
> + {
> + for (int j = 0; j < N; j++)
> + {
> + dst[i * N + j] = block[i * N + j];
> + }
> + }
> +
> +#undef N
> }
>
> -void dct16_c(const int16_t *src, int16_t *dst, intptr_t /* stride */)
> +void dct16_c(const int16_t *src, int16_t *dst, intptr_t stride)
> {
> const int shift_1st = 3 + X265_DEPTH - 8;
> const int shift_2nd = 10;
>
> ALIGN_VAR_32(int16_t, coef[16 * 16]);
> + ALIGN_VAR_32(int16_t, block[16 * 16]);
>
> - partialButterfly16(src, coef, shift_1st, 16);
> - partialButterfly16(coef, dst, shift_2nd, 16);
> + for (int i = 0; i < 16; i++)
> + {
> + memcpy(&block[i * 16], &src[i * stride], 16 * sizeof(int16_t));
> + }
> +
> + partialButterfly16(block, coef, shift_1st, 16);
> + partialButterfly16(coef, block, shift_2nd, 16);
> +
> +#define N (16)
> + for (int i = 0; i < N; i++)
> + {
> + for (int j = 0; j < N; j++)
> + {
> + dst[i * N + j] = block[i * N + j];
> + }
> + }
> +
> +#undef N
> }
>
> -void dct32_c(const int16_t *src, int16_t *dst, intptr_t /* stride */)
> +void dct32_c(const int16_t *src, int16_t *dst, intptr_t stride)
> {
> const int shift_1st = 4 + X265_DEPTH - 8;
> const int shift_2nd = 11;
>
> ALIGN_VAR_32(int16_t, coef[32 * 32]);
> + ALIGN_VAR_32(int16_t, block[32 * 32]);
>
> - partialButterfly32(src, coef, shift_1st, 32);
> - partialButterfly32(coef, dst, shift_2nd, 32);
> + for (int i = 0; i < 32; i++)
> + {
> + memcpy(&block[i * 32], &src[i * stride], 32 * sizeof(int16_t));
> + }
> +
> + partialButterfly32(block, coef, shift_1st, 32);
> + partialButterfly32(coef, block, shift_2nd, 32);
> +
> +#define N (32)
> + for (int i = 0; i < N; i++)
> + {
> + for (int j = 0; j < N; j++)
> + {
> + dst[i * N + j] = block[i * N + j];
> + }
> + }
> +
> +#undef N
> }
>
> void idst4_c(const int16_t *src, int16_t *dst, intptr_t stride)
> @@ -509,7 +587,18 @@
> ALIGN_VAR_32(int16_t, coef[4 * 4]);
> ALIGN_VAR_32(int16_t, block[4 * 4]);
>
> - inversedst(src, coef, shift_1st); // Forward DST BY FAST ALGORITHM, block input, coef output
> +#define N (4)
> + for (int i = 0; i < N; i++)
> + {
> + for (int j = 0; j < N; j++)
> + {
> + block[i * N + j] = (int16_t)src[i * N + j];
> + }
> + }
> +
> +#undef N
> +
> + inversedst(block, coef, shift_1st); // Forward DST BY FAST ALGORITHM, block input, coef output
> inversedst(coef, block, shift_2nd); // Forward DST BY FAST ALGORITHM, coef input, coeff output
>
> for (int i = 0; i < 4; i++)
> @@ -526,7 +615,18 @@
> ALIGN_VAR_32(int16_t, coef[4 * 4]);
> ALIGN_VAR_32(int16_t, block[4 * 4]);
>
> - partialButterflyInverse4(src, coef, shift_1st, 4); // Forward DST BY FAST ALGORITHM, block input, coef output
> +#define N (4)
> + for (int i = 0; i < N; i++)
> + {
> + for (int j = 0; j < N; j++)
> + {
> + block[i * N + j] = (int16_t)src[i * N + j];
> + }
> + }
> +
> +#undef N
> +
> + partialButterflyInverse4(block, coef, shift_1st, 4); // Forward DST BY FAST ALGORITHM, block input, coef output
> partialButterflyInverse4(coef, block, shift_2nd, 4); // Forward DST BY FAST ALGORITHM, coef input, coeff output
>
> for (int i = 0; i < 4; i++)
> @@ -543,7 +643,18 @@
> ALIGN_VAR_32(int16_t, coef[8 * 8]);
> ALIGN_VAR_32(int16_t, block[8 * 8]);
>
> - partialButterflyInverse8(src, coef, shift_1st, 8);
> +#define N (8)
> + for (int i = 0; i < N; i++)
> + {
> + for (int j = 0; j < N; j++)
> + {
> + block[i * N + j] = (int16_t)src[i * N + j];
> + }
> + }
> +
> +#undef N
> +
> + partialButterflyInverse8(block, coef, shift_1st, 8);
> partialButterflyInverse8(coef, block, shift_2nd, 8);
> for (int i = 0; i < 8; i++)
> {
> @@ -559,7 +670,18 @@
> ALIGN_VAR_32(int16_t, coef[16 * 16]);
> ALIGN_VAR_32(int16_t, block[16 * 16]);
>
> - partialButterflyInverse16(src, coef, shift_1st, 16);
> +#define N (16)
> + for (int i = 0; i < N; i++)
> + {
> + for (int j = 0; j < N; j++)
> + {
> + block[i * N + j] = (int16_t)src[i * N + j];
> + }
> + }
> +
> +#undef N
> +
> + partialButterflyInverse16(block, coef, shift_1st, 16);
> partialButterflyInverse16(coef, block, shift_2nd, 16);
> for (int i = 0; i < 16; i++)
> {
> @@ -575,7 +697,18 @@
> ALIGN_VAR_32(int16_t, coef[32 * 32]);
> ALIGN_VAR_32(int16_t, block[32 * 32]);
>
> - partialButterflyInverse32(src, coef, shift_1st, 32);
> +#define N (32)
> + for (int i = 0; i < N; i++)
> + {
> + for (int j = 0; j < N; j++)
> + {
> + block[i * N + j] = (int16_t)src[i * N + j];
> + }
> + }
> +
> +#undef N
> +
> + partialButterflyInverse32(block, coef, shift_1st, 32);
> partialButterflyInverse32(coef, block, shift_2nd, 32);
>
> for (int i = 0; i < 32; i++)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list