[x264-devel] commit: update altivec zigzags (Manuel )
git version control
git at videolan.org
Thu Mar 20 21:21:46 CET 2008
x264 | branch: master | Manuel <maaanuuu at gmx.net> | Thu Mar 20 13:21:16 2008 -0600| [e1d815e15cc62b52ed67b4fd1538aaa238c70e97]
update altivec zigzags
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=e1d815e15cc62b52ed67b4fd1538aaa238c70e97
---
common/ppc/dct.c | 56 +++++++++++------------------------------------------
common/ppc/dct.h | 8 +++---
2 files changed, 16 insertions(+), 48 deletions(-)
diff --git a/common/ppc/dct.c b/common/ppc/dct.c
index 2be9893..024a157 100644
--- a/common/ppc/dct.c
+++ b/common/ppc/dct.c
@@ -456,11 +456,10 @@ void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][8][8] )
x264_add8x8_idct8_altivec( &dst[8*FDEC_STRIDE+8], dct[3] );
}
-void x264_zigzag_scan_4x4_frame_altivec( int level[16], int16_t dct[4][4] )
+void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[4][4] )
{
vec_s16_t dct0v, dct1v;
vec_s16_t tmp0v, tmp1v;
- vec_s32_t level0v, level1v, level2v, level3v;
dct0v = vec_ld(0x00, (int16_t*)dct);
dct1v = vec_ld(0x10, (int16_t*)dct);
@@ -471,22 +470,14 @@ void x264_zigzag_scan_4x4_frame_altivec( int level[16], int16_t dct[4][4] )
tmp0v = vec_perm( dct0v, dct1v, sel0 );
tmp1v = vec_perm( dct0v, dct1v, sel1 );
- level0v = vec_unpackh( tmp0v );
- level1v = vec_unpackl( tmp0v );
- level2v = vec_unpackh( tmp1v );
- level3v = vec_unpackl( tmp1v );
-
- vec_st( level0v, 0x00, level );
- vec_st( level1v, 0x10, level );
- vec_st( level2v, 0x20, level );
- vec_st( level3v, 0x30, level );
+ vec_st( tmp0v, 0x00, level );
+ vec_st( tmp1v, 0x10, level );
}
-void x264_zigzag_scan_4x4_field_altivec( int level[16], int16_t dct[4][4] )
+void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] )
{
vec_s16_t dct0v, dct1v;
vec_s16_t tmp0v, tmp1v;
- vec_s32_t level0v, level1v, level2v, level3v;
dct0v = vec_ld(0x00, (int16_t*)dct);
dct1v = vec_ld(0x10, (int16_t*)dct);
@@ -496,22 +487,14 @@ void x264_zigzag_scan_4x4_field_altivec( int level[16], int16_t dct[4][4] )
tmp0v = vec_perm( dct0v, dct1v, sel0 );
tmp1v = dct1v;
- level0v = vec_unpackh( tmp0v );
- level1v = vec_unpackl( tmp0v );
- level2v = vec_unpackh( tmp1v );
- level3v = vec_unpackl( tmp1v );
-
- vec_st( level0v, 0x00, level );
- vec_st( level1v, 0x10, level );
- vec_st( level2v, 0x20, level );
- vec_st( level3v, 0x30, level );
+ vec_st( tmp0v, 0x00, level );
+ vec_st( tmp1v, 0x10, level );
}
-void x264_zigzag_scan_4x4ac_frame_altivec( int level[15], int16_t dct[4][4] )
+void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] )
{
vec_s16_t dct0v, dct1v;
vec_s16_t tmp0v, tmp1v;
- vec_s32_t level0v, level1v, level2v, level3v;
dct0v = vec_ld(0x00, (int16_t*)dct);
dct1v = vec_ld(0x10, (int16_t*)dct);
@@ -522,22 +505,14 @@ void x264_zigzag_scan_4x4ac_frame_altivec( int level[15], int16_t dct[4][4] )
tmp0v = vec_perm( dct0v, dct1v, sel0 );
tmp1v = vec_perm( dct0v, dct1v, sel1 );
- level0v = vec_unpackh( tmp0v );
- level1v = vec_unpackl( tmp0v );
- level2v = vec_unpackh( tmp1v );
- level3v = vec_unpackl( tmp1v );
-
- vec_st( level0v, 0x00, level );
- vec_st( level1v, 0x10, level );
- vec_st( level2v, 0x20, level );
- vec_st( level3v, 0x30, level ); // FIXME?: write level[15]
+ vec_st( tmp0v, 0x00, level );
+ vec_st( tmp1v, 0x10, level );
}
-void x264_zigzag_scan_4x4ac_field_altivec( int level[15], int16_t dct[4][4] )
+void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] )
{
vec_s16_t dct0v, dct1v;
vec_s16_t tmp0v, tmp1v;
- vec_s32_t level0v, level1v, level2v, level3v;
dct0v = vec_ld(0x00, (int16_t*)dct);
dct1v = vec_ld(0x10, (int16_t*)dct);
@@ -548,13 +523,6 @@ void x264_zigzag_scan_4x4ac_field_altivec( int level[15], int16_t dct[4][4] )
tmp0v = vec_perm( dct0v, dct1v, sel0 );
tmp1v = vec_perm( dct0v, dct1v, sel1 );
- level0v = vec_unpackh( tmp0v );
- level1v = vec_unpackl( tmp0v );
- level2v = vec_unpackh( tmp1v );
- level3v = vec_unpackl( tmp1v );
-
- vec_st( level0v, 0x00, level );
- vec_st( level1v, 0x10, level );
- vec_st( level2v, 0x20, level );
- vec_st( level3v, 0x30, level ); // FIXME?: write level[15]
+ vec_st( tmp0v, 0x00, level );
+ vec_st( tmp1v, 0x10, level );
}
diff --git a/common/ppc/dct.h b/common/ppc/dct.h
index 7bcde43..fa3023b 100644
--- a/common/ppc/dct.h
+++ b/common/ppc/dct.h
@@ -44,10 +44,10 @@ void x264_sub16x16_dct8_altivec( int16_t dct[4][8][8],
void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] );
void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][8][8] );
-void x264_zigzag_scan_4x4_frame_altivec( int level[16], int16_t dct[4][4] );
-void x264_zigzag_scan_4x4ac_frame_altivec( int level[15], int16_t dct[4][4] );
+void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[4][4] );
+void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] );
-void x264_zigzag_scan_4x4_field_altivec( int level[16], int16_t dct[4][4] );
-void x264_zigzag_scan_4x4ac_field_altivec( int level[15], int16_t dct[4][4] );
+void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] );
+void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] );
#endif
More information about the x264-devel
mailing list