[x265] [PATCH] DCT : C code optimization as per new interface
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Oct 28 13:08:41 CET 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1414490284 -19800
# Node ID b40f43de0ead72272c25f2e61db092617f65f1b0
# Parent 06197d720e4fbee696e513a96346dd67739fe80a
DCT : C code optimization as per new interface
diff -r 06197d720e4f -r b40f43de0ead source/common/dct.cpp
--- a/source/common/dct.cpp Tue Oct 28 15:18:25 2014 +0530
+++ b/source/common/dct.cpp Tue Oct 28 15:28:04 2014 +0530
@@ -459,113 +459,50 @@
void dct4_c(int16_t *src, int16_t *dst, intptr_t stride)
{
+ stride; // To eliminate warnings and match the interface with asm code.
const int shift_1st = 1 + X265_DEPTH - 8;
const int shift_2nd = 8;
ALIGN_VAR_32(int16_t, coef[4 * 4]);
- ALIGN_VAR_32(int16_t, block[4 * 4]);
- for (int i = 0; i < 4; i++)
- {
- memcpy(&block[i * 4], &src[i * stride], 4 * sizeof(int16_t));
- }
-
- partialButterfly4(block, coef, shift_1st, 4);
- partialButterfly4(coef, block, shift_2nd, 4);
-#define N (4)
- for (int i = 0; i < N; i++)
- {
- for (int j = 0; j < N; j++)
- {
- dst[i * N + j] = block[i * N + j];
- }
- }
-
-#undef N
+ partialButterfly4(src, coef, shift_1st, 4);
+ partialButterfly4(coef, dst, shift_2nd, 4);
}
void dct8_c(int16_t *src, int16_t *dst, intptr_t stride)
{
+ stride; // To eliminate warnings and match the interface with asm code.
const int shift_1st = 2 + X265_DEPTH - 8;
const int shift_2nd = 9;
ALIGN_VAR_32(int16_t, coef[8 * 8]);
- ALIGN_VAR_32(int16_t, block[8 * 8]);
- for (int i = 0; i < 8; i++)
- {
- memcpy(&block[i * 8], &src[i * stride], 8 * sizeof(int16_t));
- }
-
- partialButterfly8(block, coef, shift_1st, 8);
- partialButterfly8(coef, block, shift_2nd, 8);
-
-#define N (8)
- for (int i = 0; i < N; i++)
- {
- for (int j = 0; j < N; j++)
- {
- dst[i * N + j] = block[i * N + j];
- }
- }
-
-#undef N
+ partialButterfly8(src, coef, shift_1st, 8);
+ partialButterfly8(coef, dst, shift_2nd, 8);
}
void dct16_c(int16_t *src, int16_t *dst, intptr_t stride)
{
+ stride; // To eliminate warnings and match the interface with asm code.
const int shift_1st = 3 + X265_DEPTH - 8;
const int shift_2nd = 10;
ALIGN_VAR_32(int16_t, coef[16 * 16]);
- ALIGN_VAR_32(int16_t, block[16 * 16]);
- for (int i = 0; i < 16; i++)
- {
- memcpy(&block[i * 16], &src[i * stride], 16 * sizeof(int16_t));
- }
-
- partialButterfly16(block, coef, shift_1st, 16);
- partialButterfly16(coef, block, shift_2nd, 16);
-
-#define N (16)
- for (int i = 0; i < N; i++)
- {
- for (int j = 0; j < N; j++)
- {
- dst[i * N + j] = block[i * N + j];
- }
- }
-
-#undef N
+ partialButterfly16(src, coef, shift_1st, 16);
+ partialButterfly16(coef, dst, shift_2nd, 16);
}
void dct32_c(int16_t *src, int16_t *dst, intptr_t stride)
{
+ stride; // To eliminate warnings and match the interface with asm code.
const int shift_1st = 4 + X265_DEPTH - 8;
const int shift_2nd = 11;
ALIGN_VAR_32(int16_t, coef[32 * 32]);
- ALIGN_VAR_32(int16_t, block[32 * 32]);
- for (int i = 0; i < 32; i++)
- {
- memcpy(&block[i * 32], &src[i * stride], 32 * sizeof(int16_t));
- }
-
- partialButterfly32(block, coef, shift_1st, 32);
- partialButterfly32(coef, block, shift_2nd, 32);
-
-#define N (32)
- for (int i = 0; i < N; i++)
- {
- for (int j = 0; j < N; j++)
- {
- dst[i * N + j] = block[i * N + j];
- }
- }
-
-#undef N
+ partialButterfly32(src, coef, shift_1st, 32);
+ partialButterfly32(coef, dst, shift_2nd, 32);
}
void idst4_c(int16_t *src, int16_t *dst, intptr_t stride)
More information about the x265-devel
mailing list