[x265] [PATCH] DCT : C code optimization as per new interface

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Oct 28 13:08:41 CET 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1414490284 -19800
# Node ID b40f43de0ead72272c25f2e61db092617f65f1b0
# Parent  06197d720e4fbee696e513a96346dd67739fe80a
DCT : C code optimization as per new interface

diff -r 06197d720e4f -r b40f43de0ead source/common/dct.cpp
--- a/source/common/dct.cpp	Tue Oct 28 15:18:25 2014 +0530
+++ b/source/common/dct.cpp	Tue Oct 28 15:28:04 2014 +0530
@@ -459,113 +459,50 @@
 
 void dct4_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
+    stride; // To eliminate warnings and match the interface with asm code.
     const int shift_1st = 1 + X265_DEPTH - 8;
     const int shift_2nd = 8;
 
     ALIGN_VAR_32(int16_t, coef[4 * 4]);
-    ALIGN_VAR_32(int16_t, block[4 * 4]);
 
-    for (int i = 0; i < 4; i++)
-    {
-        memcpy(&block[i * 4], &src[i * stride], 4 * sizeof(int16_t));
-    }
-
-    partialButterfly4(block, coef, shift_1st, 4);
-    partialButterfly4(coef, block, shift_2nd, 4);
-#define N (4)
-    for (int i = 0; i < N; i++)
-    {
-        for (int j = 0; j < N; j++)
-        {
-            dst[i * N + j] = block[i * N + j];
-        }
-    }
-
-#undef N
+    partialButterfly4(src, coef, shift_1st, 4);
+    partialButterfly4(coef, dst, shift_2nd, 4);
 }
 
 void dct8_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
+    stride; // To eliminate warnings and match the interface with asm code.
     const int shift_1st = 2 + X265_DEPTH - 8;
     const int shift_2nd = 9;
 
     ALIGN_VAR_32(int16_t, coef[8 * 8]);
-    ALIGN_VAR_32(int16_t, block[8 * 8]);
 
-    for (int i = 0; i < 8; i++)
-    {
-        memcpy(&block[i * 8], &src[i * stride], 8 * sizeof(int16_t));
-    }
-
-    partialButterfly8(block, coef, shift_1st, 8);
-    partialButterfly8(coef, block, shift_2nd, 8);
-
-#define N (8)
-    for (int i = 0; i < N; i++)
-    {
-        for (int j = 0; j < N; j++)
-        {
-            dst[i * N + j] = block[i * N + j];
-        }
-    }
-
-#undef N
+    partialButterfly8(src, coef, shift_1st, 8);
+    partialButterfly8(coef, dst, shift_2nd, 8);
 }
 
 void dct16_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
+    stride; // To eliminate warnings and match the interface with asm code.
     const int shift_1st = 3 + X265_DEPTH - 8;
     const int shift_2nd = 10;
 
     ALIGN_VAR_32(int16_t, coef[16 * 16]);
-    ALIGN_VAR_32(int16_t, block[16 * 16]);
 
-    for (int i = 0; i < 16; i++)
-    {
-        memcpy(&block[i * 16], &src[i * stride], 16 * sizeof(int16_t));
-    }
-
-    partialButterfly16(block, coef, shift_1st, 16);
-    partialButterfly16(coef, block, shift_2nd, 16);
-
-#define N (16)
-    for (int i = 0; i < N; i++)
-    {
-        for (int j = 0; j < N; j++)
-        {
-            dst[i * N + j] = block[i * N + j];
-        }
-    }
-
-#undef N
+    partialButterfly16(src, coef, shift_1st, 16);
+    partialButterfly16(coef, dst, shift_2nd, 16);
 }
 
 void dct32_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
+    stride; // To eliminate warnings and match the interface with asm code.
     const int shift_1st = 4 + X265_DEPTH - 8;
     const int shift_2nd = 11;
 
     ALIGN_VAR_32(int16_t, coef[32 * 32]);
-    ALIGN_VAR_32(int16_t, block[32 * 32]);
 
-    for (int i = 0; i < 32; i++)
-    {
-        memcpy(&block[i * 32], &src[i * stride], 32 * sizeof(int16_t));
-    }
-
-    partialButterfly32(block, coef, shift_1st, 32);
-    partialButterfly32(coef, block, shift_2nd, 32);
-
-#define N (32)
-    for (int i = 0; i < N; i++)
-    {
-        for (int j = 0; j < N; j++)
-        {
-            dst[i * N + j] = block[i * N + j];
-        }
-    }
-
-#undef N
+    partialButterfly32(src, coef, shift_1st, 32);
+    partialButterfly32(coef, dst, shift_2nd, 32);
 }
 
 void idst4_c(int16_t *src, int16_t *dst, intptr_t stride)


More information about the x265-devel mailing list