[x265] [PATCH 1/9] Move C DCT implementations into X265_NS

Thu Aug 22 15:17:50 UTC 2024

Move C implementations of DCT functions into the X265_NS namespace, and
remove the static modifier from their declarations, so that they can be
referenced from external code when linking to libx265.
---
 source/common/dct.cpp | 340 +++++++++++++++++++++---------------------
 1 file changed, 170 insertions(+), 170 deletions(-)

diff --git a/source/common/dct.cpp b/source/common/dct.cpp
index b102b6e31..d318b2c64 100644
--- a/source/common/dct.cpp
+++ b/source/common/dct.cpp
@@ -439,176 +439,6 @@ static void partialButterfly4(const int16_t* src, int16_t* dst, int shift, int l
     }
 }
 
-static void dst4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
-{
-    const int shift_1st = 1 + X265_DEPTH - 8;
-    const int shift_2nd = 8;
-
-    ALIGN_VAR_32(int16_t, coef[4 * 4]);
-    ALIGN_VAR_32(int16_t, block[4 * 4]);
-
-    for (int i = 0; i < 4; i++)
-    {
-        memcpy(&block[i * 4], &src[i * srcStride], 4 * sizeof(int16_t));
-    }
-
-    fastForwardDst(block, coef, shift_1st);
-    fastForwardDst(coef, dst, shift_2nd);
-}
-
-static void dct4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
-{
-    const int shift_1st = 1 + X265_DEPTH - 8;
-    const int shift_2nd = 8;
-
-    ALIGN_VAR_32(int16_t, coef[4 * 4]);
-    ALIGN_VAR_32(int16_t, block[4 * 4]);
-
-    for (int i = 0; i < 4; i++)
-    {
-        memcpy(&block[i * 4], &src[i * srcStride], 4 * sizeof(int16_t));
-    }
-
-    partialButterfly4(block, coef, shift_1st, 4);
-    partialButterfly4(coef, dst, shift_2nd, 4);
-}
-
-static void dct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
-{
-    const int shift_1st = 2 + X265_DEPTH - 8;
-    const int shift_2nd = 9;
-
-    ALIGN_VAR_32(int16_t, coef[8 * 8]);
-    ALIGN_VAR_32(int16_t, block[8 * 8]);
-
-    for (int i = 0; i < 8; i++)
-    {
-        memcpy(&block[i * 8], &src[i * srcStride], 8 * sizeof(int16_t));
-    }
-
-    partialButterfly8(block, coef, shift_1st, 8);
-    partialButterfly8(coef, dst, shift_2nd, 8);
-}
-
-static void dct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
-{
-    const int shift_1st = 3 + X265_DEPTH - 8;
-    const int shift_2nd = 10;
-
-    ALIGN_VAR_32(int16_t, coef[16 * 16]);
-    ALIGN_VAR_32(int16_t, block[16 * 16]);
-
-    for (int i = 0; i < 16; i++)
-    {
-        memcpy(&block[i * 16], &src[i * srcStride], 16 * sizeof(int16_t));
-    }
-
-    partialButterfly16(block, coef, shift_1st, 16);
-    partialButterfly16(coef, dst, shift_2nd, 16);
-}
-
-static void dct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
-{
-    const int shift_1st = 4 + X265_DEPTH - 8;
-    const int shift_2nd = 11;
-
-    ALIGN_VAR_32(int16_t, coef[32 * 32]);
-    ALIGN_VAR_32(int16_t, block[32 * 32]);
-
-    for (int i = 0; i < 32; i++)
-    {
-        memcpy(&block[i * 32], &src[i * srcStride], 32 * sizeof(int16_t));
-    }
-
-    partialButterfly32(block, coef, shift_1st, 32);
-    partialButterfly32(coef, dst, shift_2nd, 32);
-}
-
-static void idst4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
-{
-    const int shift_1st = 7;
-    const int shift_2nd = 12 - (X265_DEPTH - 8);
-
-    ALIGN_VAR_32(int16_t, coef[4 * 4]);
-    ALIGN_VAR_32(int16_t, block[4 * 4]);
-
-    inversedst(src, coef, shift_1st); // Forward DST BY FAST ALGORITHM, block input, coef output
-    inversedst(coef, block, shift_2nd); // Forward DST BY FAST ALGORITHM, coef input, coeff output
-
-    for (int i = 0; i < 4; i++)
-    {
-        memcpy(&dst[i * dstStride], &block[i * 4], 4 * sizeof(int16_t));
-    }
-}
-
-static void idct4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
-{
-    const int shift_1st = 7;
-    const int shift_2nd = 12 - (X265_DEPTH - 8);
-
-    ALIGN_VAR_32(int16_t, coef[4 * 4]);
-    ALIGN_VAR_32(int16_t, block[4 * 4]);
-
-    partialButterflyInverse4(src, coef, shift_1st, 4); // Forward DST BY FAST ALGORITHM, block input, coef output
-    partialButterflyInverse4(coef, block, shift_2nd, 4); // Forward DST BY FAST ALGORITHM, coef input, coeff output
-
-    for (int i = 0; i < 4; i++)
-    {
-        memcpy(&dst[i * dstStride], &block[i * 4], 4 * sizeof(int16_t));
-    }
-}
-
-static void idct8_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
-{
-    const int shift_1st = 7;
-    const int shift_2nd = 12 - (X265_DEPTH - 8);
-
-    ALIGN_VAR_32(int16_t, coef[8 * 8]);
-    ALIGN_VAR_32(int16_t, block[8 * 8]);
-
-    partialButterflyInverse8(src, coef, shift_1st, 8);
-    partialButterflyInverse8(coef, block, shift_2nd, 8);
-
-    for (int i = 0; i < 8; i++)
-    {
-        memcpy(&dst[i * dstStride], &block[i * 8], 8 * sizeof(int16_t));
-    }
-}
-
-static void idct16_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
-{
-    const int shift_1st = 7;
-    const int shift_2nd = 12 - (X265_DEPTH - 8);
-
-    ALIGN_VAR_32(int16_t, coef[16 * 16]);
-    ALIGN_VAR_32(int16_t, block[16 * 16]);
-
-    partialButterflyInverse16(src, coef, shift_1st, 16);
-    partialButterflyInverse16(coef, block, shift_2nd, 16);
-
-    for (int i = 0; i < 16; i++)
-    {
-        memcpy(&dst[i * dstStride], &block[i * 16], 16 * sizeof(int16_t));
-    }
-}
-
-static void idct32_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
-{
-    const int shift_1st = 7;
-    const int shift_2nd = 12 - (X265_DEPTH - 8);
-
-    ALIGN_VAR_32(int16_t, coef[32 * 32]);
-    ALIGN_VAR_32(int16_t, block[32 * 32]);
-
-    partialButterflyInverse32(src, coef, shift_1st, 32);
-    partialButterflyInverse32(coef, block, shift_2nd, 32);
-
-    for (int i = 0; i < 32; i++)
-    {
-        memcpy(&dst[i * dstStride], &block[i * 32], 32 * sizeof(int16_t));
-    }
-}
-
 static void dequant_normal_c(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift)
 {
 #if HIGH_BIT_DEPTH
@@ -1070,6 +900,176 @@ static void psyRdoQuant_c_2(int16_t *m_resiDctCoeff, int16_t *m_fencDctCoeff, in
 
 namespace X265_NS {
 // x265 private namespace
+void dst4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    const int shift_1st = 1 + X265_DEPTH - 8;
+    const int shift_2nd = 8;
+
+    ALIGN_VAR_32(int16_t, coef[4 * 4]);
+    ALIGN_VAR_32(int16_t, block[4 * 4]);
+
+    for (int i = 0; i < 4; i++)
+    {
+        memcpy(&block[i * 4], &src[i * srcStride], 4 * sizeof(int16_t));
+    }
+
+    fastForwardDst(block, coef, shift_1st);
+    fastForwardDst(coef, dst, shift_2nd);
+}
+
+void dct4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    const int shift_1st = 1 + X265_DEPTH - 8;
+    const int shift_2nd = 8;
+
+    ALIGN_VAR_32(int16_t, coef[4 * 4]);
+    ALIGN_VAR_32(int16_t, block[4 * 4]);
+
+    for (int i = 0; i < 4; i++)
+    {
+        memcpy(&block[i * 4], &src[i * srcStride], 4 * sizeof(int16_t));
+    }
+
+    partialButterfly4(block, coef, shift_1st, 4);
+    partialButterfly4(coef, dst, shift_2nd, 4);
+}
+
+void dct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    const int shift_1st = 2 + X265_DEPTH - 8;
+    const int shift_2nd = 9;
+
+    ALIGN_VAR_32(int16_t, coef[8 * 8]);
+    ALIGN_VAR_32(int16_t, block[8 * 8]);
+
+    for (int i = 0; i < 8; i++)
+    {
+        memcpy(&block[i * 8], &src[i * srcStride], 8 * sizeof(int16_t));
+    }
+
+    partialButterfly8(block, coef, shift_1st, 8);
+    partialButterfly8(coef, dst, shift_2nd, 8);
+}
+
+void dct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    const int shift_1st = 3 + X265_DEPTH - 8;
+    const int shift_2nd = 10;
+
+    ALIGN_VAR_32(int16_t, coef[16 * 16]);
+    ALIGN_VAR_32(int16_t, block[16 * 16]);
+
+    for (int i = 0; i < 16; i++)
+    {
+        memcpy(&block[i * 16], &src[i * srcStride], 16 * sizeof(int16_t));
+    }
+
+    partialButterfly16(block, coef, shift_1st, 16);
+    partialButterfly16(coef, dst, shift_2nd, 16);
+}
+
+void dct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    const int shift_1st = 4 + X265_DEPTH - 8;
+    const int shift_2nd = 11;
+
+    ALIGN_VAR_32(int16_t, coef[32 * 32]);
+    ALIGN_VAR_32(int16_t, block[32 * 32]);
+
+    for (int i = 0; i < 32; i++)
+    {
+        memcpy(&block[i * 32], &src[i * srcStride], 32 * sizeof(int16_t));
+    }
+
+    partialButterfly32(block, coef, shift_1st, 32);
+    partialButterfly32(coef, dst, shift_2nd, 32);
+}
+
+void idst4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+    const int shift_1st = 7;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+    ALIGN_VAR_32(int16_t, coef[4 * 4]);
+    ALIGN_VAR_32(int16_t, block[4 * 4]);
+
+    inversedst(src, coef, shift_1st); // Forward DST BY FAST ALGORITHM, block input, coef output
+    inversedst(coef, block, shift_2nd); // Forward DST BY FAST ALGORITHM, coef input, coeff output
+
+    for (int i = 0; i < 4; i++)
+    {
+        memcpy(&dst[i * dstStride], &block[i * 4], 4 * sizeof(int16_t));
+    }
+}
+
+void idct4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+    const int shift_1st = 7;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+    ALIGN_VAR_32(int16_t, coef[4 * 4]);
+    ALIGN_VAR_32(int16_t, block[4 * 4]);
+
+    partialButterflyInverse4(src, coef, shift_1st, 4); // Forward DST BY FAST ALGORITHM, block input, coef output
+    partialButterflyInverse4(coef, block, shift_2nd, 4); // Forward DST BY FAST ALGORITHM, coef input, coeff output
+
+    for (int i = 0; i < 4; i++)
+    {
+        memcpy(&dst[i * dstStride], &block[i * 4], 4 * sizeof(int16_t));
+    }
+}
+
+void idct8_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+    const int shift_1st = 7;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+    ALIGN_VAR_32(int16_t, coef[8 * 8]);
+    ALIGN_VAR_32(int16_t, block[8 * 8]);
+
+    partialButterflyInverse8(src, coef, shift_1st, 8);
+    partialButterflyInverse8(coef, block, shift_2nd, 8);
+
+    for (int i = 0; i < 8; i++)
+    {
+        memcpy(&dst[i * dstStride], &block[i * 8], 8 * sizeof(int16_t));
+    }
+}
+
+void idct16_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+    const int shift_1st = 7;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+    ALIGN_VAR_32(int16_t, coef[16 * 16]);
+    ALIGN_VAR_32(int16_t, block[16 * 16]);
+
+    partialButterflyInverse16(src, coef, shift_1st, 16);
+    partialButterflyInverse16(coef, block, shift_2nd, 16);
+
+    for (int i = 0; i < 16; i++)
+    {
+        memcpy(&dst[i * dstStride], &block[i * 16], 16 * sizeof(int16_t));
+    }
+}
+
+void idct32_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+    const int shift_1st = 7;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+    ALIGN_VAR_32(int16_t, coef[32 * 32]);
+    ALIGN_VAR_32(int16_t, block[32 * 32]);
+
+    partialButterflyInverse32(src, coef, shift_1st, 32);
+    partialButterflyInverse32(coef, block, shift_2nd, 32);
+
+    for (int i = 0; i < 32; i++)
+    {
+        memcpy(&dst[i * dstStride], &block[i * 32], 32 * sizeof(int16_t));
+    }
+}
+
 void setupDCTPrimitives_c(EncoderPrimitives& p)
 {
     p.dequant_scaling = dequant_scaling_c;
-- 
2.42.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Move-C-DCT-implementations-into-X265_NS.patch
Type: text/x-patch
Size: 11363 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240822/9e461e2f/attachment.bin>