[x265] [PATCH 1/9] Move C DCT implementations into X265_NS
Hari Limaye
hari.limaye at arm.com
Thu Aug 22 15:17:50 UTC 2024
Move C implementations of DCT functions into the X265_NS namespace, and
remove the static modifier from their declarations, so that they can be
referenced from external code when linking to libx265.
---
source/common/dct.cpp | 340 +++++++++++++++++++++---------------------
1 file changed, 170 insertions(+), 170 deletions(-)
diff --git a/source/common/dct.cpp b/source/common/dct.cpp
index b102b6e31..d318b2c64 100644
--- a/source/common/dct.cpp
+++ b/source/common/dct.cpp
@@ -439,176 +439,6 @@ static void partialButterfly4(const int16_t* src, int16_t* dst, int shift, int l
}
}
-static void dst4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
-{
- const int shift_1st = 1 + X265_DEPTH - 8;
- const int shift_2nd = 8;
-
- ALIGN_VAR_32(int16_t, coef[4 * 4]);
- ALIGN_VAR_32(int16_t, block[4 * 4]);
-
- for (int i = 0; i < 4; i++)
- {
- memcpy(&block[i * 4], &src[i * srcStride], 4 * sizeof(int16_t));
- }
-
- fastForwardDst(block, coef, shift_1st);
- fastForwardDst(coef, dst, shift_2nd);
-}
-
-static void dct4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
-{
- const int shift_1st = 1 + X265_DEPTH - 8;
- const int shift_2nd = 8;
-
- ALIGN_VAR_32(int16_t, coef[4 * 4]);
- ALIGN_VAR_32(int16_t, block[4 * 4]);
-
- for (int i = 0; i < 4; i++)
- {
- memcpy(&block[i * 4], &src[i * srcStride], 4 * sizeof(int16_t));
- }
-
- partialButterfly4(block, coef, shift_1st, 4);
- partialButterfly4(coef, dst, shift_2nd, 4);
-}
-
-static void dct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
-{
- const int shift_1st = 2 + X265_DEPTH - 8;
- const int shift_2nd = 9;
-
- ALIGN_VAR_32(int16_t, coef[8 * 8]);
- ALIGN_VAR_32(int16_t, block[8 * 8]);
-
- for (int i = 0; i < 8; i++)
- {
- memcpy(&block[i * 8], &src[i * srcStride], 8 * sizeof(int16_t));
- }
-
- partialButterfly8(block, coef, shift_1st, 8);
- partialButterfly8(coef, dst, shift_2nd, 8);
-}
-
-static void dct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
-{
- const int shift_1st = 3 + X265_DEPTH - 8;
- const int shift_2nd = 10;
-
- ALIGN_VAR_32(int16_t, coef[16 * 16]);
- ALIGN_VAR_32(int16_t, block[16 * 16]);
-
- for (int i = 0; i < 16; i++)
- {
- memcpy(&block[i * 16], &src[i * srcStride], 16 * sizeof(int16_t));
- }
-
- partialButterfly16(block, coef, shift_1st, 16);
- partialButterfly16(coef, dst, shift_2nd, 16);
-}
-
-static void dct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
-{
- const int shift_1st = 4 + X265_DEPTH - 8;
- const int shift_2nd = 11;
-
- ALIGN_VAR_32(int16_t, coef[32 * 32]);
- ALIGN_VAR_32(int16_t, block[32 * 32]);
-
- for (int i = 0; i < 32; i++)
- {
- memcpy(&block[i * 32], &src[i * srcStride], 32 * sizeof(int16_t));
- }
-
- partialButterfly32(block, coef, shift_1st, 32);
- partialButterfly32(coef, dst, shift_2nd, 32);
-}
-
-static void idst4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
-{
- const int shift_1st = 7;
- const int shift_2nd = 12 - (X265_DEPTH - 8);
-
- ALIGN_VAR_32(int16_t, coef[4 * 4]);
- ALIGN_VAR_32(int16_t, block[4 * 4]);
-
- inversedst(src, coef, shift_1st); // Forward DST BY FAST ALGORITHM, block input, coef output
- inversedst(coef, block, shift_2nd); // Forward DST BY FAST ALGORITHM, coef input, coeff output
-
- for (int i = 0; i < 4; i++)
- {
- memcpy(&dst[i * dstStride], &block[i * 4], 4 * sizeof(int16_t));
- }
-}
-
-static void idct4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
-{
- const int shift_1st = 7;
- const int shift_2nd = 12 - (X265_DEPTH - 8);
-
- ALIGN_VAR_32(int16_t, coef[4 * 4]);
- ALIGN_VAR_32(int16_t, block[4 * 4]);
-
- partialButterflyInverse4(src, coef, shift_1st, 4); // Forward DST BY FAST ALGORITHM, block input, coef output
- partialButterflyInverse4(coef, block, shift_2nd, 4); // Forward DST BY FAST ALGORITHM, coef input, coeff output
-
- for (int i = 0; i < 4; i++)
- {
- memcpy(&dst[i * dstStride], &block[i * 4], 4 * sizeof(int16_t));
- }
-}
-
-static void idct8_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
-{
- const int shift_1st = 7;
- const int shift_2nd = 12 - (X265_DEPTH - 8);
-
- ALIGN_VAR_32(int16_t, coef[8 * 8]);
- ALIGN_VAR_32(int16_t, block[8 * 8]);
-
- partialButterflyInverse8(src, coef, shift_1st, 8);
- partialButterflyInverse8(coef, block, shift_2nd, 8);
-
- for (int i = 0; i < 8; i++)
- {
- memcpy(&dst[i * dstStride], &block[i * 8], 8 * sizeof(int16_t));
- }
-}
-
-static void idct16_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
-{
- const int shift_1st = 7;
- const int shift_2nd = 12 - (X265_DEPTH - 8);
-
- ALIGN_VAR_32(int16_t, coef[16 * 16]);
- ALIGN_VAR_32(int16_t, block[16 * 16]);
-
- partialButterflyInverse16(src, coef, shift_1st, 16);
- partialButterflyInverse16(coef, block, shift_2nd, 16);
-
- for (int i = 0; i < 16; i++)
- {
- memcpy(&dst[i * dstStride], &block[i * 16], 16 * sizeof(int16_t));
- }
-}
-
-static void idct32_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
-{
- const int shift_1st = 7;
- const int shift_2nd = 12 - (X265_DEPTH - 8);
-
- ALIGN_VAR_32(int16_t, coef[32 * 32]);
- ALIGN_VAR_32(int16_t, block[32 * 32]);
-
- partialButterflyInverse32(src, coef, shift_1st, 32);
- partialButterflyInverse32(coef, block, shift_2nd, 32);
-
- for (int i = 0; i < 32; i++)
- {
- memcpy(&dst[i * dstStride], &block[i * 32], 32 * sizeof(int16_t));
- }
-}
-
static void dequant_normal_c(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift)
{
#if HIGH_BIT_DEPTH
@@ -1070,6 +900,176 @@ static void psyRdoQuant_c_2(int16_t *m_resiDctCoeff, int16_t *m_fencDctCoeff, in
namespace X265_NS {
// x265 private namespace
+void dst4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+ const int shift_1st = 1 + X265_DEPTH - 8;
+ const int shift_2nd = 8;
+
+ ALIGN_VAR_32(int16_t, coef[4 * 4]);
+ ALIGN_VAR_32(int16_t, block[4 * 4]);
+
+ for (int i = 0; i < 4; i++)
+ {
+ memcpy(&block[i * 4], &src[i * srcStride], 4 * sizeof(int16_t));
+ }
+
+ fastForwardDst(block, coef, shift_1st);
+ fastForwardDst(coef, dst, shift_2nd);
+}
+
+void dct4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+ const int shift_1st = 1 + X265_DEPTH - 8;
+ const int shift_2nd = 8;
+
+ ALIGN_VAR_32(int16_t, coef[4 * 4]);
+ ALIGN_VAR_32(int16_t, block[4 * 4]);
+
+ for (int i = 0; i < 4; i++)
+ {
+ memcpy(&block[i * 4], &src[i * srcStride], 4 * sizeof(int16_t));
+ }
+
+ partialButterfly4(block, coef, shift_1st, 4);
+ partialButterfly4(coef, dst, shift_2nd, 4);
+}
+
+void dct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+ const int shift_1st = 2 + X265_DEPTH - 8;
+ const int shift_2nd = 9;
+
+ ALIGN_VAR_32(int16_t, coef[8 * 8]);
+ ALIGN_VAR_32(int16_t, block[8 * 8]);
+
+ for (int i = 0; i < 8; i++)
+ {
+ memcpy(&block[i * 8], &src[i * srcStride], 8 * sizeof(int16_t));
+ }
+
+ partialButterfly8(block, coef, shift_1st, 8);
+ partialButterfly8(coef, dst, shift_2nd, 8);
+}
+
+void dct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+ const int shift_1st = 3 + X265_DEPTH - 8;
+ const int shift_2nd = 10;
+
+ ALIGN_VAR_32(int16_t, coef[16 * 16]);
+ ALIGN_VAR_32(int16_t, block[16 * 16]);
+
+ for (int i = 0; i < 16; i++)
+ {
+ memcpy(&block[i * 16], &src[i * srcStride], 16 * sizeof(int16_t));
+ }
+
+ partialButterfly16(block, coef, shift_1st, 16);
+ partialButterfly16(coef, dst, shift_2nd, 16);
+}
+
+void dct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+ const int shift_1st = 4 + X265_DEPTH - 8;
+ const int shift_2nd = 11;
+
+ ALIGN_VAR_32(int16_t, coef[32 * 32]);
+ ALIGN_VAR_32(int16_t, block[32 * 32]);
+
+ for (int i = 0; i < 32; i++)
+ {
+ memcpy(&block[i * 32], &src[i * srcStride], 32 * sizeof(int16_t));
+ }
+
+ partialButterfly32(block, coef, shift_1st, 32);
+ partialButterfly32(coef, dst, shift_2nd, 32);
+}
+
+void idst4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+ const int shift_1st = 7;
+ const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+ ALIGN_VAR_32(int16_t, coef[4 * 4]);
+ ALIGN_VAR_32(int16_t, block[4 * 4]);
+
+ inversedst(src, coef, shift_1st); // Forward DST BY FAST ALGORITHM, block input, coef output
+ inversedst(coef, block, shift_2nd); // Forward DST BY FAST ALGORITHM, coef input, coeff output
+
+ for (int i = 0; i < 4; i++)
+ {
+ memcpy(&dst[i * dstStride], &block[i * 4], 4 * sizeof(int16_t));
+ }
+}
+
+void idct4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+ const int shift_1st = 7;
+ const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+ ALIGN_VAR_32(int16_t, coef[4 * 4]);
+ ALIGN_VAR_32(int16_t, block[4 * 4]);
+
+ partialButterflyInverse4(src, coef, shift_1st, 4); // Forward DST BY FAST ALGORITHM, block input, coef output
+ partialButterflyInverse4(coef, block, shift_2nd, 4); // Forward DST BY FAST ALGORITHM, coef input, coeff output
+
+ for (int i = 0; i < 4; i++)
+ {
+ memcpy(&dst[i * dstStride], &block[i * 4], 4 * sizeof(int16_t));
+ }
+}
+
+void idct8_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+ const int shift_1st = 7;
+ const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+ ALIGN_VAR_32(int16_t, coef[8 * 8]);
+ ALIGN_VAR_32(int16_t, block[8 * 8]);
+
+ partialButterflyInverse8(src, coef, shift_1st, 8);
+ partialButterflyInverse8(coef, block, shift_2nd, 8);
+
+ for (int i = 0; i < 8; i++)
+ {
+ memcpy(&dst[i * dstStride], &block[i * 8], 8 * sizeof(int16_t));
+ }
+}
+
+void idct16_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+ const int shift_1st = 7;
+ const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+ ALIGN_VAR_32(int16_t, coef[16 * 16]);
+ ALIGN_VAR_32(int16_t, block[16 * 16]);
+
+ partialButterflyInverse16(src, coef, shift_1st, 16);
+ partialButterflyInverse16(coef, block, shift_2nd, 16);
+
+ for (int i = 0; i < 16; i++)
+ {
+ memcpy(&dst[i * dstStride], &block[i * 16], 16 * sizeof(int16_t));
+ }
+}
+
+void idct32_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+ const int shift_1st = 7;
+ const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+ ALIGN_VAR_32(int16_t, coef[32 * 32]);
+ ALIGN_VAR_32(int16_t, block[32 * 32]);
+
+ partialButterflyInverse32(src, coef, shift_1st, 32);
+ partialButterflyInverse32(coef, block, shift_2nd, 32);
+
+ for (int i = 0; i < 32; i++)
+ {
+ memcpy(&dst[i * dstStride], &block[i * 32], 32 * sizeof(int16_t));
+ }
+}
+
void setupDCTPrimitives_c(EncoderPrimitives& p)
{
p.dequant_scaling = dequant_scaling_c;
--
2.42.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Move-C-DCT-implementations-into-X265_NS.patch
Type: text/x-patch
Size: 11363 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240822/9e461e2f/attachment.bin>
More information about the x265-devel
mailing list