[x264-devel] commit: Add Altivec implementation of all the remaining 16x16 predict routines. ( Guillaume Poirier )

Wed Jan 14 21:15:29 CET 2009

x264 | branch: master | Guillaume Poirier <gpoirier at mplayerhq.hu> | Wed Jan 14 21:13:58 2009 +0100| [264e447cc4ce267d7e4d078b080716093a78a2c8] | committer: Guillaume Poirier 

Add Altivec implementation of all the remaining 16x16 predict routines.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=264e447cc4ce267d7e4d078b080716093a78a2c8
---

 common/ppc/ppccommon.h |    5 ++
 common/ppc/predict.c   |  110 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/common/ppc/ppccommon.h b/common/ppc/ppccommon.h
index ac0a9ca..3db035c 100644
--- a/common/ppc/ppccommon.h
+++ b/common/ppc/ppccommon.h
@@ -56,6 +56,11 @@ typedef union {
   vector signed short v;
 } vect_sshort_u;
 
+typedef union {
+  unsigned char s[16];
+  vector unsigned char v;
+} vec_u8_u;
+
 /***********************************************************************
  * Null vector
  **********************************************************************/
diff --git a/common/ppc/predict.c b/common/ppc/predict.c
index 1fcd72b..62a5578 100644
--- a/common/ppc/predict.c
+++ b/common/ppc/predict.c
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.c: h264 encoder
  *****************************************************************************
- * Copyright (C) 2007-2008 Guillaume Poirier <gpoirier at mplayerhq.hu>
+ * Copyright (C) 2007-2009 Guillaume Poirier <gpoirier at mplayerhq.hu>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -23,6 +23,10 @@
 #include "pixel.h"
 #include "ppccommon.h"
 
+/****************************************************************************
+ * 16x16 prediction for intra luma block
+ ****************************************************************************/
+
 static void predict_16x16_p_altivec( uint8_t *src )
 {
     int16_t a, b, c, i;
@@ -74,10 +78,112 @@ static void predict_16x16_p_altivec( uint8_t *src )
     }
 }
 
+#define PREDICT_16x16_DC_ALTIVEC(v) \
+for (i=0; i<16; i+=2)               \
+{                                   \
+    vec_st(v, 0, src);              \
+    vec_st(v, FDEC_STRIDE, src);    \
+    src += FDEC_STRIDE*2;           \
+}
+
+static void predict_16x16_dc_altivec( uint8_t *src )
+{
+    uint32_t dc = 0;
+    int i;
+
+    for( i = 0; i < 16; i++ )
+    {
+        dc += src[-1 + i * FDEC_STRIDE];
+        dc += src[i - FDEC_STRIDE];
+    }
+    vec_u8_u v ; v.s[0] = (( dc + 16 ) >> 5);
+    vec_u8_t bc_v = vec_splat(v.v, 0);
+
+    PREDICT_16x16_DC_ALTIVEC(bc_v);
+}
+
+static void predict_16x16_dc_left_altivec( uint8_t *src )
+{
+    uint32_t dc = 0;
+    int i;
+
+    for( i = 0; i < 16; i++ )
+    {
+        dc += src[-1 + i * FDEC_STRIDE];
+    }
+    vec_u8_u v ; v.s[0] = (( dc + 8 ) >> 4);
+    vec_u8_t bc_v = vec_splat(v.v, 0);
+
+    PREDICT_16x16_DC_ALTIVEC(bc_v);
+}
+
+static void predict_16x16_dc_top_altivec( uint8_t *src )
+{
+    uint32_t dc = 0;
+    int i;
+
+    for( i = 0; i < 16; i++ )
+    {
+        dc += src[i - FDEC_STRIDE];
+    }
+    vec_u8_u v ; v.s[0] = (( dc + 8 ) >> 4);
+    vec_u8_t bc_v = vec_splat(v.v, 0);
+
+    PREDICT_16x16_DC_ALTIVEC(bc_v);
+}
+
+static void predict_16x16_dc_128_altivec( uint8_t *src )
+{
+    int i;
+    /* test if generating the constant is faster than loading it.
+    vector unsigned int bc_v = (vector unsigned int)CV(0x80808080, 0x80808080, 0x80808080, 0x80808080);
+    */
+    vec_u8_t bc_v = vec_vslb((vec_u8_t)vec_splat_u8(1),(vec_u8_t)vec_splat_u8(7));
+    PREDICT_16x16_DC_ALTIVEC(bc_v);
+}
+
+static void predict_16x16_h_altivec( uint8_t *src )
+{
+    int i;
+
+    for( i = 0; i < 16; i++ )
+    {
+        vec_u8_t v = vec_ld(-1, src);
+        vec_u8_t v_v = vec_splat(v, 15);
+        vec_st(v_v, 0, src);
+
+        src += FDEC_STRIDE;
+    }
+}
+
+static void predict_16x16_v_altivec( uint8_t *src )
+{
+    vect_int_u v;
+    v.s[0] = *(uint32_t*)&src[ 0-FDEC_STRIDE];
+    v.s[1] = *(uint32_t*)&src[ 4-FDEC_STRIDE];
+    v.s[2] = *(uint32_t*)&src[ 8-FDEC_STRIDE];
+    v.s[3] = *(uint32_t*)&src[12-FDEC_STRIDE];
+
+    int i;
+
+    for( i = 0; i < 16; i++ )
+    {
+        vec_st(v.v, 0, (uint32_t*)src);
+        src += FDEC_STRIDE;
+    }
+}
+
+
 /****************************************************************************
  * Exported functions:
  ****************************************************************************/
 void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
 {
-    pf[I_PRED_16x16_P]       = predict_16x16_p_altivec;
+    pf[I_PRED_16x16_V ]      = predict_16x16_v_altivec;
+    pf[I_PRED_16x16_H ]      = predict_16x16_h_altivec;
+    pf[I_PRED_16x16_DC]      = predict_16x16_dc_altivec;
+    pf[I_PRED_16x16_P ]      = predict_16x16_p_altivec;
+    pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left_altivec;
+    pf[I_PRED_16x16_DC_TOP ] = predict_16x16_dc_top_altivec;
+    pf[I_PRED_16x16_DC_128 ] = predict_16x16_dc_128_altivec;
 }