[x264-devel] commit: Fix Altivec SATD with small strides (Manuel Rommel )
git at videolan.org
git at videolan.org
Wed Nov 10 10:12:28 CET 2010
x264 | branch: master | Manuel Rommel <maaanuuu at gmx.net> | Mon Oct 11 13:50:09 2010 -0700| [1fda37ab88917dd9c8746c0fb56dfee3e2c74f03] | committer: Jason Garrett-Glaser
Fix Altivec SATD with small strides
Fixes chroma ME and some of lookahead on PPC.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=1fda37ab88917dd9c8746c0fb56dfee3e2c74f03
---
common/ppc/pixel.c | 26 +++++++++++++-------------
common/ppc/ppccommon.h | 10 ++++++++++
2 files changed, 23 insertions(+), 13 deletions(-)
diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
index f75b401..0660126 100644
--- a/common/ppc/pixel.c
+++ b/common/ppc/pixel.c
@@ -283,19 +283,19 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
temp4v, temp5v, temp6v, temp7v;
vec_s32_t satdv;
- PREP_LOAD_SRC( pix1 );
- vec_u8_t _offset1v_ = vec_lvsl(0, pix2);
- vec_u8_t _offset2v_ = vec_lvsl(0, pix2 + i_pix2);
-
-
- VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff0v, offset1v );
- VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff1v, offset2v );
- VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff2v, offset1v );
- VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff3v, offset2v );
- VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff4v, offset1v );
- VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff5v, offset2v );
- VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff6v, offset1v );
- VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff7v, offset2v );
+ vec_u8_t _offset1_1v_ = vec_lvsl(0, pix1);
+ vec_u8_t _offset1_2v_ = vec_lvsl(0, pix1 + i_pix1);
+ vec_u8_t _offset2_1v_ = vec_lvsl(0, pix2);
+ vec_u8_t _offset2_2v_ = vec_lvsl(0, pix2 + i_pix2);
+
+ VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff0v, offset1_1v, offset2_1v );
+ VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff1v, offset1_2v, offset2_2v );
+ VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff2v, offset1_1v, offset2_1v );
+ VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff3v, offset1_2v, offset2_2v );
+ VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff4v, offset1_1v, offset2_1v );
+ VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff5v, offset1_2v, offset2_2v );
+ VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff6v, offset1_1v, offset2_1v );
+ VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff7v, offset1_2v, offset2_2v );
VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v,
temp0v, temp1v, temp2v, temp3v );
diff --git a/common/ppc/ppccommon.h b/common/ppc/ppccommon.h
index 11ab2a4..a3fb603 100644
--- a/common/ppc/ppccommon.h
+++ b/common/ppc/ppccommon.h
@@ -269,6 +269,16 @@ typedef union {
p1 += i1; \
p2 += i2
+#define VEC_DIFF_H_OFFSET(p1,i1,p2,i2,n,d,g1,g2) \
+ pix1v = (vec_s16_t)vec_perm( vec_ld( 0, p1 ), zero_u8v, _##g1##_ );\
+ pix1v = vec_u8_to_s16( pix1v ); \
+ VEC_LOAD( p2, pix2v, n, vec_s16_t, g2); \
+ pix2v = vec_u8_to_s16( pix2v ); \
+ d = vec_sub( pix1v, pix2v ); \
+ p1 += i1; \
+ p2 += i2
+
+
/***********************************************************************
* VEC_DIFF_HL
***********************************************************************
More information about the x264-devel
mailing list