[x264-devel] commit: Fix Altivec SATD with small strides (Manuel Rommel )

git at videolan.org git at videolan.org
Wed Nov 10 10:12:28 CET 2010


x264 | branch: master | Manuel Rommel <maaanuuu at gmx.net> | Mon Oct 11 13:50:09 2010 -0700| [1fda37ab88917dd9c8746c0fb56dfee3e2c74f03] | committer: Jason Garrett-Glaser 

Fix Altivec SATD with small strides
Fixes chroma ME and some of lookahead on PPC.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=1fda37ab88917dd9c8746c0fb56dfee3e2c74f03
---

 common/ppc/pixel.c     |   26 +++++++++++++-------------
 common/ppc/ppccommon.h |   10 ++++++++++
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
index f75b401..0660126 100644
--- a/common/ppc/pixel.c
+++ b/common/ppc/pixel.c
@@ -283,19 +283,19 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
               temp4v, temp5v, temp6v, temp7v;
     vec_s32_t satdv;
 
-    PREP_LOAD_SRC( pix1 );
-    vec_u8_t _offset1v_ = vec_lvsl(0, pix2);
-    vec_u8_t _offset2v_ = vec_lvsl(0, pix2 + i_pix2);
-
-
-    VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff0v, offset1v );
-    VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff1v, offset2v );
-    VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff2v, offset1v );
-    VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff3v, offset2v );
-    VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff4v, offset1v );
-    VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff5v, offset2v );
-    VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff6v, offset1v );
-    VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff7v, offset2v );
+    vec_u8_t _offset1_1v_ = vec_lvsl(0, pix1);
+    vec_u8_t _offset1_2v_ = vec_lvsl(0, pix1 + i_pix1);
+    vec_u8_t _offset2_1v_ = vec_lvsl(0, pix2);
+    vec_u8_t _offset2_2v_ = vec_lvsl(0, pix2 + i_pix2);
+
+    VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff0v, offset1_1v, offset2_1v );
+    VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff1v, offset1_2v, offset2_2v );
+    VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff2v, offset1_1v, offset2_1v );
+    VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff3v, offset1_2v, offset2_2v );
+    VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff4v, offset1_1v, offset2_1v );
+    VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff5v, offset1_2v, offset2_2v );
+    VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff6v, offset1_1v, offset2_1v );
+    VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff7v, offset1_2v, offset2_2v );
 
     VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v,
                  temp0v, temp1v, temp2v, temp3v );
diff --git a/common/ppc/ppccommon.h b/common/ppc/ppccommon.h
index 11ab2a4..a3fb603 100644
--- a/common/ppc/ppccommon.h
+++ b/common/ppc/ppccommon.h
@@ -269,6 +269,16 @@ typedef union {
     p1   += i1;                                     \
     p2   += i2
 
+#define VEC_DIFF_H_OFFSET(p1,i1,p2,i2,n,d,g1,g2)    \
+    pix1v = (vec_s16_t)vec_perm( vec_ld( 0, p1 ), zero_u8v, _##g1##_ );\
+    pix1v = vec_u8_to_s16( pix1v );                 \
+    VEC_LOAD( p2, pix2v, n, vec_s16_t, g2);         \
+    pix2v = vec_u8_to_s16( pix2v );                 \
+    d     = vec_sub( pix1v, pix2v );                \
+    p1   += i1;                                     \
+    p2   += i2
+
+
 /***********************************************************************
  * VEC_DIFF_HL
  ***********************************************************************



More information about the x264-devel mailing list