[x264-devel] [PATCH 1/2] ppc: Add add8x8_idct_dc
Alexandra Hájková
alexandra.khirnova at gmail.com
Mon Nov 14 15:06:05 CET 2016
From: Alexandra Hajkova <alexandra at khirnov.net>
---
common/dct.c | 2 ++
common/ppc/dct.c | 29 +++++++++++++++++++++++++++++
common/ppc/dct.h | 2 ++
3 files changed, 33 insertions(+)
diff --git a/common/dct.c b/common/dct.c
index 7dfeea2..d59c2db 100644
--- a/common/dct.c
+++ b/common/dct.c
@@ -720,6 +720,8 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->sub8x8_dct = x264_sub8x8_dct_altivec;
dctf->sub16x16_dct = x264_sub16x16_dct_altivec;
+ dctf->add8x8_idct_dc = x264_add8x8_idct_dc_altivec;
+
dctf->add4x4_idct = x264_add4x4_idct_altivec;
dctf->add8x8_idct = x264_add8x8_idct_altivec;
dctf->add16x16_idct = x264_add16x16_idct_altivec;
diff --git a/common/ppc/dct.c b/common/ppc/dct.c
index 6dc0447..3c04c77 100644
--- a/common/ppc/dct.c
+++ b/common/ppc/dct.c
@@ -229,6 +229,35 @@ void x264_sub16x16_dct8_altivec( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix
* IDCT transform:
****************************************************************************/
+#define ALTIVEC_STORE8_DC_SUM_CLIP(dest, dcv) \
+{ \
+ /* unaligned load */ \
+ vec_u8_t dstv = vec_vsx_ld( 0, dest ); \
+ vec_s16_t dcvsum = vec_adds( dcv, vec_u8_to_s16_h(dstv) ); \
+ vec_u8_t dcvsum8 = vec_packsu( dcvsum, vec_u8_to_s16_l(dstv) ); \
+ /* unaligned store */ \
+ vec_vsx_st( dcvsum8, 0, dest ); \
+}
+
+static void idct8_dc_altivec( uint8_t *dst, int16_t dc1, int16_t dc2 )
+{
+ dc1 = ( dc1 + 32 ) >> 6;
+ dc2 = ( dc2 + 32 ) >> 6;
+ vec_s16_t dcv = { dc1, dc1, dc1, dc1, dc2, dc2, dc2, dc2 };
+
+ LOAD_ZERO;
+ ALTIVEC_STORE8_DC_SUM_CLIP( &dst[0*FDEC_STRIDE], dcv);
+ ALTIVEC_STORE8_DC_SUM_CLIP( &dst[1*FDEC_STRIDE], dcv);
+ ALTIVEC_STORE8_DC_SUM_CLIP( &dst[2*FDEC_STRIDE], dcv);
+ ALTIVEC_STORE8_DC_SUM_CLIP( &dst[3*FDEC_STRIDE], dcv);
+}
+
+void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] )
+{
+ idct8_dc_altivec( &p_dst[0], dct[0], dct[1] );
+ idct8_dc_altivec( &p_dst[4*FDEC_STRIDE+0], dct[2], dct[3] );
+}
+
#define IDCT_1D_ALTIVEC(s0, s1, s2, s3, d0, d1, d2, d3) \
{ \
/* a0 = SRC(0) + SRC(2); */ \
diff --git a/common/ppc/dct.h b/common/ppc/dct.h
index 332f3cc..4011b8f 100644
--- a/common/ppc/dct.h
+++ b/common/ppc/dct.h
@@ -31,6 +31,8 @@ void x264_sub4x4_dct_altivec( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 );
void x264_sub8x8_dct_altivec( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 );
void x264_sub16x16_dct_altivec( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
+void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] );
+
void x264_add4x4_idct_altivec( uint8_t *p_dst, int16_t dct[16] );
void x264_add8x8_idct_altivec( uint8_t *p_dst, int16_t dct[4][16] );
void x264_add16x16_idct_altivec( uint8_t *p_dst, int16_t dct[16][16] );
--
2.7.4
More information about the x264-devel
mailing list