[x264-devel] commit: update altivec zigzags (Manuel )

git version control git at videolan.org
Thu Mar 20 21:21:46 CET 2008


x264 | branch: master | Manuel <maaanuuu at gmx.net> | Thu Mar 20 13:21:16 2008 -0600| [e1d815e15cc62b52ed67b4fd1538aaa238c70e97]

update altivec zigzags

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=e1d815e15cc62b52ed67b4fd1538aaa238c70e97
---

 common/ppc/dct.c |   56 +++++++++++------------------------------------------
 common/ppc/dct.h |    8 +++---
 2 files changed, 16 insertions(+), 48 deletions(-)

diff --git a/common/ppc/dct.c b/common/ppc/dct.c
index 2be9893..024a157 100644
--- a/common/ppc/dct.c
+++ b/common/ppc/dct.c
@@ -456,11 +456,10 @@ void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][8][8] )
     x264_add8x8_idct8_altivec( &dst[8*FDEC_STRIDE+8], dct[3] );
 }
 
-void x264_zigzag_scan_4x4_frame_altivec( int level[16], int16_t dct[4][4] )
+void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[4][4] )
 {
     vec_s16_t dct0v, dct1v;
     vec_s16_t tmp0v, tmp1v;
-    vec_s32_t level0v, level1v, level2v, level3v;
 
     dct0v = vec_ld(0x00, (int16_t*)dct);
     dct1v = vec_ld(0x10, (int16_t*)dct);
@@ -471,22 +470,14 @@ void x264_zigzag_scan_4x4_frame_altivec( int level[16], int16_t dct[4][4] )
     tmp0v = vec_perm( dct0v, dct1v, sel0 );
     tmp1v = vec_perm( dct0v, dct1v, sel1 );
 
-    level0v = vec_unpackh( tmp0v );
-    level1v = vec_unpackl( tmp0v );
-    level2v = vec_unpackh( tmp1v );
-    level3v = vec_unpackl( tmp1v );
-
-    vec_st( level0v, 0x00, level );
-    vec_st( level1v, 0x10, level );
-    vec_st( level2v, 0x20, level );
-    vec_st( level3v, 0x30, level );
+    vec_st( tmp0v, 0x00, level );
+    vec_st( tmp1v, 0x10, level );
 }
 
-void x264_zigzag_scan_4x4_field_altivec( int level[16], int16_t dct[4][4] )
+void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] )
 {
     vec_s16_t dct0v, dct1v;
     vec_s16_t tmp0v, tmp1v;
-    vec_s32_t level0v, level1v, level2v, level3v;
 
     dct0v = vec_ld(0x00, (int16_t*)dct);
     dct1v = vec_ld(0x10, (int16_t*)dct);
@@ -496,22 +487,14 @@ void x264_zigzag_scan_4x4_field_altivec( int level[16], int16_t dct[4][4] )
     tmp0v = vec_perm( dct0v, dct1v, sel0 );
     tmp1v = dct1v;
 
-    level0v = vec_unpackh( tmp0v );
-    level1v = vec_unpackl( tmp0v );
-    level2v = vec_unpackh( tmp1v );
-    level3v = vec_unpackl( tmp1v );
-
-    vec_st( level0v, 0x00, level );
-    vec_st( level1v, 0x10, level );
-    vec_st( level2v, 0x20, level );
-    vec_st( level3v, 0x30, level );
+    vec_st( tmp0v, 0x00, level );
+    vec_st( tmp1v, 0x10, level );
 }
 
-void x264_zigzag_scan_4x4ac_frame_altivec( int level[15], int16_t dct[4][4] )
+void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] )
 {
     vec_s16_t dct0v, dct1v;
     vec_s16_t tmp0v, tmp1v;
-    vec_s32_t level0v, level1v, level2v, level3v;
 
     dct0v = vec_ld(0x00, (int16_t*)dct);
     dct1v = vec_ld(0x10, (int16_t*)dct);
@@ -522,22 +505,14 @@ void x264_zigzag_scan_4x4ac_frame_altivec( int level[15], int16_t dct[4][4] )
     tmp0v = vec_perm( dct0v, dct1v, sel0 );
     tmp1v = vec_perm( dct0v, dct1v, sel1 );
 
-    level0v = vec_unpackh( tmp0v );
-    level1v = vec_unpackl( tmp0v );
-    level2v = vec_unpackh( tmp1v );
-    level3v = vec_unpackl( tmp1v );
-
-    vec_st( level0v, 0x00, level );
-    vec_st( level1v, 0x10, level );
-    vec_st( level2v, 0x20, level );
-    vec_st( level3v, 0x30, level ); // FIXME?: write level[15]
+    vec_st( tmp0v, 0x00, level );
+    vec_st( tmp1v, 0x10, level );
 }
 
-void x264_zigzag_scan_4x4ac_field_altivec( int level[15], int16_t dct[4][4] )
+void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] )
 {
     vec_s16_t dct0v, dct1v;
     vec_s16_t tmp0v, tmp1v;
-    vec_s32_t level0v, level1v, level2v, level3v;
 
     dct0v = vec_ld(0x00, (int16_t*)dct);
     dct1v = vec_ld(0x10, (int16_t*)dct);
@@ -548,13 +523,6 @@ void x264_zigzag_scan_4x4ac_field_altivec( int level[15], int16_t dct[4][4] )
     tmp0v = vec_perm( dct0v, dct1v, sel0 );
     tmp1v = vec_perm( dct0v, dct1v, sel1 );
 
-    level0v = vec_unpackh( tmp0v );
-    level1v = vec_unpackl( tmp0v );
-    level2v = vec_unpackh( tmp1v );
-    level3v = vec_unpackl( tmp1v );
-
-    vec_st( level0v, 0x00, level );
-    vec_st( level1v, 0x10, level );
-    vec_st( level2v, 0x20, level );
-    vec_st( level3v, 0x30, level ); // FIXME?: write level[15]
+    vec_st( tmp0v, 0x00, level );
+    vec_st( tmp1v, 0x10, level );
 }
diff --git a/common/ppc/dct.h b/common/ppc/dct.h
index 7bcde43..fa3023b 100644
--- a/common/ppc/dct.h
+++ b/common/ppc/dct.h
@@ -44,10 +44,10 @@ void x264_sub16x16_dct8_altivec( int16_t dct[4][8][8],
 void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] );
 void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][8][8] );
 
-void x264_zigzag_scan_4x4_frame_altivec( int level[16], int16_t dct[4][4] );
-void x264_zigzag_scan_4x4ac_frame_altivec( int level[15], int16_t dct[4][4] );
+void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[4][4] );
+void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] );
 
-void x264_zigzag_scan_4x4_field_altivec( int level[16], int16_t dct[4][4] );
-void x264_zigzag_scan_4x4ac_field_altivec( int level[15], int16_t dct[4][4] );
+void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] );
+void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] );
 
 #endif



More information about the x264-devel mailing list